LCOV - code coverage report
Current view: top level - alg - gdalwarpkernel.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 3464 4040 85.7 %
Date: 2026-02-28 20:27:10 Functions: 232 270 85.9 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  High Performance Image Reprojector
       4             :  * Purpose:  Implementation of the GDALWarpKernel class.  Implements the actual
       5             :  *           image warping for a "chunk" of input and output imagery already
       6             :  *           loaded into memory.
       7             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       8             :  *
       9             :  ******************************************************************************
      10             :  * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
      11             :  * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
      12             :  *
      13             :  * SPDX-License-Identifier: MIT
      14             :  ****************************************************************************/
      15             : 
      16             : #include "cpl_port.h"
      17             : #include "gdalwarper.h"
      18             : 
      19             : #include <cfloat>
      20             : #include <cmath>
      21             : #include <cstddef>
      22             : #include <cstdlib>
      23             : #include <cstring>
      24             : 
      25             : #include <algorithm>
      26             : #include <limits>
      27             : #include <mutex>
      28             : #include <new>
      29             : #include <utility>
      30             : #include <vector>
      31             : 
      32             : #include "cpl_atomic_ops.h"
      33             : #include "cpl_conv.h"
      34             : #include "cpl_error.h"
      35             : #include "cpl_float.h"
      36             : #include "cpl_mask.h"
      37             : #include "cpl_multiproc.h"
      38             : #include "cpl_progress.h"
      39             : #include "cpl_string.h"
      40             : #include "cpl_vsi.h"
      41             : #include "cpl_worker_thread_pool.h"
      42             : #include "cpl_quad_tree.h"
      43             : #include "gdal.h"
      44             : #include "gdal_alg.h"
      45             : #include "gdal_alg_priv.h"
      46             : #include "gdal_thread_pool.h"
      47             : #include "gdalresamplingkernels.h"
      48             : 
      49             : // #define CHECK_SUM_WITH_GEOS
      50             : #ifdef CHECK_SUM_WITH_GEOS
      51             : #include "ogr_geometry.h"
      52             : #include "ogr_geos.h"
      53             : #endif
      54             : 
      55             : #ifdef USE_NEON_OPTIMIZATIONS
      56             : #include "include_sse2neon.h"
      57             : #define USE_SSE2
      58             : 
      59             : #include "gdalsse_priv.h"
      60             : 
      61             : // We restrict to 64bit processors because they are guaranteed to have SSE2.
      62             : // Could possibly be used too on 32bit, but we would need to check at runtime.
      63             : #elif defined(__x86_64) || defined(_M_X64)
      64             : #define USE_SSE2
      65             : 
      66             : #include "gdalsse_priv.h"
      67             : 
      68             : #if __SSE4_1__
      69             : #include <smmintrin.h>
      70             : #endif
      71             : 
      72             : #if __SSE3__
      73             : #include <pmmintrin.h>
      74             : #endif
      75             : 
      76             : #endif
      77             : 
      78             : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
      79             : constexpr float SRC_DENSITY_THRESHOLD_FLOAT = 0.000000001f;
      80             : constexpr double SRC_DENSITY_THRESHOLD_DOUBLE = 0.000000001;
      81             : 
      82             : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
      83             : 
      84             : static const int anGWKFilterRadius[] = {
      85             :     0,  // Nearest neighbour
      86             :     1,  // Bilinear
      87             :     2,  // Cubic Convolution (Catmull-Rom)
      88             :     2,  // Cubic B-Spline
      89             :     3,  // Lanczos windowed sinc
      90             :     0,  // Average
      91             :     0,  // Mode
      92             :     0,  // Reserved GRA_Gauss=7
      93             :     0,  // Max
      94             :     0,  // Min
      95             :     0,  // Med
      96             :     0,  // Q1
      97             :     0,  // Q3
      98             :     0,  // Sum
      99             :     0,  // RMS
     100             : };
     101             : 
     102             : static double GWKBilinear(double dfX);
     103             : static double GWKCubic(double dfX);
     104             : static double GWKBSpline(double dfX);
     105             : static double GWKLanczosSinc(double dfX);
     106             : 
     107             : static const FilterFuncType apfGWKFilter[] = {
     108             :     nullptr,         // Nearest neighbour
     109             :     GWKBilinear,     // Bilinear
     110             :     GWKCubic,        // Cubic Convolution (Catmull-Rom)
     111             :     GWKBSpline,      // Cubic B-Spline
     112             :     GWKLanczosSinc,  // Lanczos windowed sinc
     113             :     nullptr,         // Average
     114             :     nullptr,         // Mode
     115             :     nullptr,         // Reserved GRA_Gauss=7
     116             :     nullptr,         // Max
     117             :     nullptr,         // Min
     118             :     nullptr,         // Med
     119             :     nullptr,         // Q1
     120             :     nullptr,         // Q3
     121             :     nullptr,         // Sum
     122             :     nullptr,         // RMS
     123             : };
     124             : 
     125             : // TODO(schwehr): Can we make these functions have a const * const arg?
     126             : static double GWKBilinear4Values(double *padfVals);
     127             : static double GWKCubic4Values(double *padfVals);
     128             : static double GWKBSpline4Values(double *padfVals);
     129             : static double GWKLanczosSinc4Values(double *padfVals);
     130             : 
     131             : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
     132             :     nullptr,                // Nearest neighbour
     133             :     GWKBilinear4Values,     // Bilinear
     134             :     GWKCubic4Values,        // Cubic Convolution (Catmull-Rom)
     135             :     GWKBSpline4Values,      // Cubic B-Spline
     136             :     GWKLanczosSinc4Values,  // Lanczos windowed sinc
     137             :     nullptr,                // Average
     138             :     nullptr,                // Mode
     139             :     nullptr,                // Reserved GRA_Gauss=7
     140             :     nullptr,                // Max
     141             :     nullptr,                // Min
     142             :     nullptr,                // Med
     143             :     nullptr,                // Q1
     144             :     nullptr,                // Q3
     145             :     nullptr,                // Sum
     146             :     nullptr,                // RMS
     147             : };
     148             : 
     149       13624 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
     150             : {
     151             :     static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
     152             :                   "Bad size of anGWKFilterRadius");
     153       13624 :     return anGWKFilterRadius[eResampleAlg];
     154             : }
     155             : 
     156        5093 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
     157             : {
     158             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
     159             :                   "Bad size of apfGWKFilter");
     160        5093 :     return apfGWKFilter[eResampleAlg];
     161             : }
     162             : 
     163        5093 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
     164             : {
     165             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
     166             :                   "Bad size of apfGWKFilter4Values");
     167        5093 :     return apfGWKFilter4Values[eResampleAlg];
     168             : }
     169             : 
     170             : static CPLErr GWKGeneralCase(GDALWarpKernel *);
     171             : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
     172             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     173             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     174             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     175             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     176             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     177             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     178             : #endif
     179             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     180             : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
     181             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     182             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     183             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     184             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     185             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     186             : #endif
     187             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     188             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     189             : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
     190             : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
     191             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     192             : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
     193             : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
     194             : static CPLErr GWKSumPreserving(GDALWarpKernel *);
     195             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     196             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     197             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     198             : 
     199             : /************************************************************************/
     200             : /*                             GWKJobStruct                             */
     201             : /************************************************************************/
     202             : 
     203             : struct GWKJobStruct
     204             : {
     205             :     std::mutex &mutex;
     206             :     std::condition_variable &cv;
     207             :     int counterSingleThreaded = 0;
     208             :     int &counter;
     209             :     bool &stopFlag;
     210             :     GDALWarpKernel *poWK = nullptr;
     211             :     int iYMin = 0;
     212             :     int iYMax = 0;
     213             :     int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
     214             :     void *pTransformerArg = nullptr;
     215             :     // used by GWKRun() to assign the proper pTransformerArg
     216             :     void (*pfnFunc)(void *) = nullptr;
     217             : 
     218        3135 :     GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
     219             :                  int &counter_, bool &stopFlag_)
     220        3135 :         : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
     221             :     {
     222        3135 :     }
     223             : };
     224             : 
     225             : struct GWKThreadData
     226             : {
     227             :     std::unique_ptr<CPLJobQueue> poJobQueue{};
     228             :     std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
     229             :     int nMaxThreads{0};
     230             :     int counter{0};
     231             :     bool stopFlag{false};
     232             :     std::mutex mutex{};
     233             :     std::condition_variable cv{};
     234             :     bool bTransformerArgInputAssignedToThread{false};
     235             :     void *pTransformerArgInput{
     236             :         nullptr};  // owned by calling layer. Not to be destroyed
     237             :     std::map<GIntBig, void *> mapThreadToTransformerArg{};
     238             :     int nTotalThreadCountForThisRun = 0;
     239             :     int nCurThreadCountForThisRun = 0;
     240             : };
     241             : 
     242             : /************************************************************************/
     243             : /*                         GWKProgressThread()                          */
     244             : /************************************************************************/
     245             : 
     246             : // Return TRUE if the computation must be interrupted.
     247          36 : static int GWKProgressThread(GWKJobStruct *psJob)
     248             : {
     249          36 :     bool stop = false;
     250             :     {
     251          36 :         std::lock_guard<std::mutex> lock(psJob->mutex);
     252          36 :         psJob->counter++;
     253          36 :         stop = psJob->stopFlag;
     254             :     }
     255          36 :     psJob->cv.notify_one();
     256             : 
     257          36 :     return stop;
     258             : }
     259             : 
     260             : /************************************************************************/
     261             : /*                       GWKProgressMonoThread()                        */
     262             : /************************************************************************/
     263             : 
     264             : // Return TRUE if the computation must be interrupted.
     265      417779 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
     266             : {
     267      417779 :     GDALWarpKernel *poWK = psJob->poWK;
     268      417779 :     if (!poWK->pfnProgress(poWK->dfProgressBase +
     269      417779 :                                poWK->dfProgressScale *
     270      417779 :                                    (++psJob->counterSingleThreaded /
     271      417779 :                                     static_cast<double>(psJob->iYMax)),
     272             :                            "", poWK->pProgress))
     273             :     {
     274           1 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     275           1 :         psJob->stopFlag = true;
     276           1 :         return TRUE;
     277             :     }
     278      417778 :     return FALSE;
     279             : }
     280             : 
     281             : /************************************************************************/
     282             : /*                        GWKGenericMonoThread()                        */
     283             : /************************************************************************/
     284             : 
     285        3111 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
     286             :                                    void (*pfnFunc)(void *pUserData))
     287             : {
     288        3111 :     GWKThreadData td;
     289             : 
     290             :     // NOTE: the mutex is not used.
     291        3111 :     GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
     292        3111 :     job.poWK = poWK;
     293        3111 :     job.iYMin = 0;
     294        3111 :     job.iYMax = poWK->nDstYSize;
     295        3111 :     job.pfnProgress = GWKProgressMonoThread;
     296        3111 :     job.pTransformerArg = poWK->pTransformerArg;
     297        3111 :     job.counterSingleThreaded = td.counter;
     298        3111 :     pfnFunc(&job);
     299        3111 :     td.counter = job.counterSingleThreaded;
     300             : 
     301        6222 :     return td.stopFlag ? CE_Failure : CE_None;
     302             : }
     303             : 
     304             : /************************************************************************/
     305             : /*                          GWKThreadsCreate()                          */
     306             : /************************************************************************/
     307             : 
     308        1794 : void *GWKThreadsCreate(char **papszWarpOptions,
     309             :                        GDALTransformerFunc /* pfnTransformer */,
     310             :                        void *pTransformerArg)
     311             : {
     312        1794 :     const int nThreads = GDALGetNumThreads(papszWarpOptions, "NUM_THREADS",
     313             :                                            GDAL_DEFAULT_MAX_THREAD_COUNT,
     314             :                                            /* bDefaultAllCPUs = */ false);
     315        1794 :     GWKThreadData *psThreadData = new GWKThreadData();
     316             :     auto poThreadPool =
     317        1794 :         nThreads > 1 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
     318        1794 :     if (poThreadPool)
     319             :     {
     320          24 :         psThreadData->nMaxThreads = nThreads;
     321          24 :         psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
     322             :             nThreads,
     323          24 :             GWKJobStruct(psThreadData->mutex, psThreadData->cv,
     324          48 :                          psThreadData->counter, psThreadData->stopFlag)));
     325             : 
     326          24 :         psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
     327          24 :         psThreadData->pTransformerArgInput = pTransformerArg;
     328             :     }
     329             : 
     330        1794 :     return psThreadData;
     331             : }
     332             : 
     333             : /************************************************************************/
     334             : /*                           GWKThreadsEnd()                            */
     335             : /************************************************************************/
     336             : 
     337        1794 : void GWKThreadsEnd(void *psThreadDataIn)
     338             : {
     339        1794 :     if (psThreadDataIn == nullptr)
     340           0 :         return;
     341             : 
     342        1794 :     GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
     343        1794 :     if (psThreadData->poJobQueue)
     344             :     {
     345             :         // cppcheck-suppress constVariableReference
     346          34 :         for (auto &pair : psThreadData->mapThreadToTransformerArg)
     347             :         {
     348          10 :             CPLAssert(pair.second != psThreadData->pTransformerArgInput);
     349          10 :             GDALDestroyTransformer(pair.second);
     350             :         }
     351          24 :         psThreadData->poJobQueue.reset();
     352             :     }
     353        1794 :     delete psThreadData;
     354             : }
     355             : 
     356             : /************************************************************************/
     357             : /*                         ThreadFuncAdapter()                          */
     358             : /************************************************************************/
     359             : 
     360          33 : static void ThreadFuncAdapter(void *pData)
     361             : {
     362          33 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
     363          33 :     GWKThreadData *psThreadData =
     364          33 :         static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
     365             : 
     366             :     // Look if we have already a per-thread transformer
     367          33 :     void *pTransformerArg = nullptr;
     368          33 :     const GIntBig nThreadId = CPLGetPID();
     369             : 
     370             :     {
     371          66 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     372          33 :         ++psThreadData->nCurThreadCountForThisRun;
     373             : 
     374          33 :         auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
     375          33 :         if (oIter != psThreadData->mapThreadToTransformerArg.end())
     376             :         {
     377           0 :             pTransformerArg = oIter->second;
     378             :         }
     379          33 :         else if (!psThreadData->bTransformerArgInputAssignedToThread &&
     380          33 :                  psThreadData->nCurThreadCountForThisRun ==
     381          33 :                      psThreadData->nTotalThreadCountForThisRun)
     382             :         {
     383             :             // If we are the last thread to be started, temporarily borrow the
     384             :             // original transformer
     385          23 :             psThreadData->bTransformerArgInputAssignedToThread = true;
     386          23 :             pTransformerArg = psThreadData->pTransformerArgInput;
     387          23 :             psThreadData->mapThreadToTransformerArg[nThreadId] =
     388             :                 pTransformerArg;
     389             :         }
     390             : 
     391          33 :         if (pTransformerArg == nullptr)
     392             :         {
     393          10 :             CPLAssert(psThreadData->pTransformerArgInput != nullptr);
     394          10 :             CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
     395             :         }
     396             :     }
     397             : 
     398             :     // If no transformer assigned to current thread, instantiate one
     399          33 :     if (pTransformerArg == nullptr)
     400             :     {
     401             :         // This somehow assumes that GDALCloneTransformer() is thread-safe
     402             :         // which should normally be the case.
     403             :         pTransformerArg =
     404          10 :             GDALCloneTransformer(psThreadData->pTransformerArgInput);
     405             : 
     406             :         // Lock for the stop flag and the transformer map.
     407          10 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     408          10 :         if (!pTransformerArg)
     409             :         {
     410           0 :             psJob->stopFlag = true;
     411           0 :             return;
     412             :         }
     413          10 :         psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
     414             :     }
     415             : 
     416          33 :     psJob->pTransformerArg = pTransformerArg;
     417          33 :     psJob->pfnFunc(pData);
     418             : 
     419             :     // Give back original transformer, if borrowed.
     420             :     {
     421          66 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     422          33 :         if (psThreadData->bTransformerArgInputAssignedToThread &&
     423          26 :             pTransformerArg == psThreadData->pTransformerArgInput)
     424             :         {
     425             :             psThreadData->mapThreadToTransformerArg.erase(
     426          23 :                 psThreadData->mapThreadToTransformerArg.find(nThreadId));
     427          23 :             psThreadData->bTransformerArgInputAssignedToThread = false;
     428             :         }
     429             :     }
     430             : }
     431             : 
     432             : /************************************************************************/
     433             : /*                               GWKRun()                               */
     434             : /************************************************************************/
     435             : 
     436        3134 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
     437             :                      void (*pfnFunc)(void *pUserData))
     438             : 
     439             : {
     440        3134 :     const int nDstYSize = poWK->nDstYSize;
     441             : 
     442        3134 :     CPLDebug("GDAL",
     443             :              "GDALWarpKernel()::%s() "
     444             :              "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
     445             :              pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
     446             :              poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
     447             :              poWK->nDstYSize);
     448             : 
     449        3134 :     if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
     450             :     {
     451           0 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     452           0 :         return CE_Failure;
     453             :     }
     454             : 
     455        3134 :     GWKThreadData *psThreadData =
     456             :         static_cast<GWKThreadData *>(poWK->psThreadData);
     457        3134 :     if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
     458             :     {
     459        3111 :         return GWKGenericMonoThread(poWK, pfnFunc);
     460             :     }
     461             : 
     462          23 :     int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
     463             :     // Config option mostly useful for tests to be able to test multithreading
     464             :     // with small rasters
     465             :     const int nWarpChunkSize =
     466          23 :         atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
     467          23 :     if (nWarpChunkSize > 0)
     468             :     {
     469          21 :         GIntBig nChunks =
     470          21 :             static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
     471          21 :         if (nThreads > nChunks)
     472          16 :             nThreads = static_cast<int>(nChunks);
     473             :     }
     474          23 :     if (nThreads <= 0)
     475          19 :         nThreads = 1;
     476             : 
     477          23 :     CPLDebug("WARP", "Using %d threads", nThreads);
     478             : 
     479          23 :     auto &jobs = *psThreadData->threadJobs;
     480          23 :     CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
     481             :     // Fill-in job structures.
     482          56 :     for (int i = 0; i < nThreads; ++i)
     483             :     {
     484          33 :         auto &job = jobs[i];
     485          33 :         job.poWK = poWK;
     486          33 :         job.iYMin =
     487          33 :             static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
     488          33 :         job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
     489          33 :                                      nThreads);
     490          33 :         if (poWK->pfnProgress != GDALDummyProgress)
     491           2 :             job.pfnProgress = GWKProgressThread;
     492          33 :         job.pfnFunc = pfnFunc;
     493             :     }
     494             : 
     495             :     bool bStopFlag;
     496             :     {
     497          23 :         std::unique_lock<std::mutex> lock(psThreadData->mutex);
     498             : 
     499          23 :         psThreadData->nTotalThreadCountForThisRun = nThreads;
     500             :         // coverity[missing_lock]
     501          23 :         psThreadData->nCurThreadCountForThisRun = 0;
     502             : 
     503             :         // Start jobs.
     504          56 :         for (int i = 0; i < nThreads; ++i)
     505             :         {
     506          33 :             auto &job = jobs[i];
     507          33 :             psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
     508             :                                                 static_cast<void *>(&job));
     509             :         }
     510             : 
     511             :         /* --------------------------------------------------------------------
     512             :          */
     513             :         /*      Report progress. */
     514             :         /* --------------------------------------------------------------------
     515             :          */
     516          23 :         if (poWK->pfnProgress != GDALDummyProgress)
     517             :         {
     518           4 :             while (psThreadData->counter < nDstYSize)
     519             :             {
     520           3 :                 psThreadData->cv.wait(lock);
     521           3 :                 if (!poWK->pfnProgress(poWK->dfProgressBase +
     522           3 :                                            poWK->dfProgressScale *
     523           3 :                                                (psThreadData->counter /
     524           3 :                                                 static_cast<double>(nDstYSize)),
     525             :                                        "", poWK->pProgress))
     526             :                 {
     527           1 :                     CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     528           1 :                     psThreadData->stopFlag = true;
     529           1 :                     break;
     530             :                 }
     531             :             }
     532             :         }
     533             : 
     534          23 :         bStopFlag = psThreadData->stopFlag;
     535             :     }
     536             : 
     537             :     /* -------------------------------------------------------------------- */
     538             :     /*      Wait for all jobs to complete.                                  */
     539             :     /* -------------------------------------------------------------------- */
     540          23 :     psThreadData->poJobQueue->WaitCompletion();
     541             : 
     542          23 :     return bStopFlag ? CE_Failure : CE_None;
     543             : }
     544             : 
     545             : /************************************************************************/
     546             : /* ==================================================================== */
     547             : /*                            GDALWarpKernel                            */
     548             : /* ==================================================================== */
     549             : /************************************************************************/
     550             : 
     551             : /**
     552             :  * \class GDALWarpKernel "gdalwarper.h"
     553             :  *
     554             :  * Low level image warping class.
     555             :  *
     556             :  * This class is responsible for low level image warping for one
     557             :  * "chunk" of imagery.  The class is essentially a structure with all
     558             :  * data members public - primarily so that new special-case functions
     559             :  * can be added without changing the class declaration.
     560             :  *
     561             :  * Applications are normally intended to interactive with warping facilities
     562             :  * through the GDALWarpOperation class, though the GDALWarpKernel can in
     563             :  * theory be used directly if great care is taken in setting up the
     564             :  * control data.
     565             :  *
     566             :  * <h3>Design Issues</h3>
     567             :  *
     568             :  * The intention is that PerformWarp() would analyze the setup in terms
     569             :  * of the datatype, resampling type, and validity/density mask usage and
     570             :  * pick one of many specific implementations of the warping algorithm over
     571             :  * a continuum of optimization vs. generality.  At one end there will be a
     572             :  * reference general purpose implementation of the algorithm that supports
     573             :  * any data type (working internally in double precision complex), all three
     574             :  * resampling types, and any or all of the validity/density masks.  At the
     575             :  * other end would be highly optimized algorithms for common cases like
     576             :  * nearest neighbour resampling on GDT_UInt8 data with no masks.
     577             :  *
     578             :  * The full set of optimized versions have not been decided but we should
     579             :  * expect to have at least:
     580             :  *  - One for each resampling algorithm for 8bit data with no masks.
     581             :  *  - One for each resampling algorithm for float data with no masks.
     582             :  *  - One for each resampling algorithm for float data with any/all masks
     583             :  *    (essentially the generic case for just float data).
     584             :  *  - One for each resampling algorithm for 8bit data with support for
     585             :  *    input validity masks (per band or per pixel).  This handles the common
     586             :  *    case of nodata masking.
     587             :  *  - One for each resampling algorithm for float data with support for
     588             :  *    input validity masks (per band or per pixel).  This handles the common
     589             :  *    case of nodata masking.
     590             :  *
     591             :  * Some of the specializations would operate on all bands in one pass
     592             :  * (especially the ones without masking would do this), while others might
     593             :  * process each band individually to reduce code complexity.
     594             :  *
     595             :  * <h3>Masking Semantics</h3>
     596             :  *
     597             :  * A detailed explanation of the semantics of the validity and density masks,
     598             :  * and their effects on resampling kernels is needed here.
     599             :  */
     600             : 
     601             : /************************************************************************/
     602             : /*                     GDALWarpKernel Data Members                      */
     603             : /************************************************************************/
     604             : 
     605             : /**
     606             :  * \var GDALResampleAlg GDALWarpKernel::eResample;
     607             :  *
     608             :  * Resampling algorithm.
     609             :  *
     610             :  * The resampling algorithm to use.  One of GRA_NearestNeighbour, GRA_Bilinear,
     611             :  * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
     612             :  * GRA_Mode or GRA_Sum.
     613             :  *
     614             :  * This field is required. GDT_NearestNeighbour may be used as a default
     615             :  * value.
     616             :  */
     617             : 
     618             : /**
     619             :  * \var GDALDataType GDALWarpKernel::eWorkingDataType;
     620             :  *
     621             :  * Working pixel data type.
     622             :  *
     623             :  * The datatype of pixels in the source image (papabySrcimage) and
     624             :  * destination image (papabyDstImage) buffers.  Note that operations on
     625             :  * some data types (such as GDT_UInt8) may be much better optimized than other
     626             :  * less common cases.
     627             :  *
     628             :  * This field is required.  It may not be GDT_Unknown.
     629             :  */
     630             : 
     631             : /**
     632             :  * \var int GDALWarpKernel::nBands;
     633             :  *
     634             :  * Number of bands.
     635             :  *
     636             :  * The number of bands (layers) of imagery being warped.  Determines the
     637             :  * number of entries in the papabySrcImage, papanBandSrcValid,
     638             :  * and papabyDstImage arrays.
     639             :  *
     640             :  * This field is required.
     641             :  */
     642             : 
     643             : /**
     644             :  * \var int GDALWarpKernel::nSrcXSize;
     645             :  *
     646             :  * Source image width in pixels.
     647             :  *
     648             :  * This field is required.
     649             :  */
     650             : 
     651             : /**
     652             :  * \var int GDALWarpKernel::nSrcYSize;
     653             :  *
     654             :  * Source image height in pixels.
     655             :  *
     656             :  * This field is required.
     657             :  */
     658             : 
     659             : /**
     660             :  * \var double GDALWarpKernel::dfSrcXExtraSize;
     661             :  *
     662             :  * Number of pixels included in nSrcXSize that are present on the edges of
     663             :  * the area of interest to take into account the width of the kernel.
     664             :  *
     665             :  * This field is required.
     666             :  */
     667             : 
     668             : /**
     669             :  * \var double GDALWarpKernel::dfSrcYExtraSize;
     670             :  *
     671             :  * Number of pixels included in nSrcYExtraSize that are present on the edges of
     672             :  * the area of interest to take into account the height of the kernel.
     673             :  *
     674             :  * This field is required.
     675             :  */
     676             : 
     677             : /**
     678             :  * \var int GDALWarpKernel::papabySrcImage;
     679             :  *
     680             :  * Array of source image band data.
     681             :  *
     682             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     683             :  * to image data.  Each individual band of image data is organized as a single
     684             :  * block of image data in left to right, then bottom to top order.  The actual
     685             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     686             :  *
     687             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     688             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     689             :  * this:
     690             :  *
     691             :  * \code
     692             :  *   float dfPixelValue;
     693             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     694             :  *   int   nPixel = 3; // Zero based.
     695             :  *   int   nLine = 4;  // Zero based.
     696             :  *
     697             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     698             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     699             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     700             :  *   dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
     701             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     702             :  * \endcode
     703             :  *
     704             :  * This field is required.
     705             :  */
     706             : 
     707             : /**
     708             :  * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
     709             :  *
     710             :  * Per band validity mask for source pixels.
     711             :  *
     712             :  * Array of pixel validity mask layers for each source band.   Each of
     713             :  * the mask layers is the same size (in pixels) as the source image with
     714             :  * one bit per pixel.  Note that it is legal (and common) for this to be
     715             :  * NULL indicating that none of the pixels are invalidated, or for some
     716             :  * band validity masks to be NULL in which case all pixels of the band are
     717             :  * valid.  The following code can be used to test the validity of a particular
     718             :  * pixel.
     719             :  *
     720             :  * \code
     721             :  *   int   bIsValid = TRUE;
     722             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     723             :  *   int   nPixel = 3; // Zero based.
     724             :  *   int   nLine = 4;  // Zero based.
     725             :  *
     726             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     727             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     728             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     729             :  *
     730             :  *   if( poKern->papanBandSrcValid != NULL
     731             :  *       && poKern->papanBandSrcValid[nBand] != NULL )
     732             :  *   {
     733             :  *       GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
     734             :  *       int    iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
     735             :  *
     736             :  *       bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
     737             :  *   }
     738             :  * \endcode
     739             :  */
     740             : 
     741             : /**
     742             :  * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
     743             :  *
     744             :  * Per pixel validity mask for source pixels.
     745             :  *
     746             :  * A single validity mask layer that applies to the pixels of all source
     747             :  * bands.  It is accessed similarly to papanBandSrcValid, but without the
     748             :  * extra level of band indirection.
     749             :  *
     750             :  * This pointer may be NULL indicating that all pixels are valid.
     751             :  *
     752             :  * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
     753             :  * the pixel isn't considered to be valid unless both arrays indicate it is
     754             :  * valid.
     755             :  */
     756             : 
     757             : /**
     758             :  * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
     759             :  *
     760             :  * Per pixel density mask for source pixels.
     761             :  *
     762             :  * A single density mask layer that applies to the pixels of all source
     763             :  * bands.  It contains values between 0.0 and 1.0 indicating the degree to
     764             :  * which this pixel should be allowed to contribute to the output result.
     765             :  *
     766             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     767             :  *
     768             :  * The density for a pixel may be accessed like this:
     769             :  *
     770             :  * \code
     771             :  *   float fDensity = 1.0;
     772             :  *   int nPixel = 3;  // Zero based.
     773             :  *   int nLine = 4;   // Zero based.
     774             :  *
     775             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     776             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     777             :  *   if( poKern->pafUnifiedSrcDensity != NULL )
     778             :  *     fDensity = poKern->pafUnifiedSrcDensity
     779             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     780             :  * \endcode
     781             :  */
     782             : 
     783             : /**
     784             :  * \var int GDALWarpKernel::nDstXSize;
     785             :  *
     786             :  * Width of destination image in pixels.
     787             :  *
     788             :  * This field is required.
     789             :  */
     790             : 
     791             : /**
     792             :  * \var int GDALWarpKernel::nDstYSize;
     793             :  *
     794             :  * Height of destination image in pixels.
     795             :  *
     796             :  * This field is required.
     797             :  */
     798             : 
     799             : /**
     800             :  * \var GByte **GDALWarpKernel::papabyDstImage;
     801             :  *
     802             :  * Array of destination image band data.
     803             :  *
     804             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     805             :  * to image data.  Each individual band of image data is organized as a single
     806             :  * block of image data in left to right, then bottom to top order.  The actual
     807             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     808             :  *
     809             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     810             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     811             :  * this:
     812             :  *
     813             :  * \code
     814             :  *   float dfPixelValue;
     815             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     816             :  *   int   nPixel = 3; // Zero based.
     817             :  *   int   nLine = 4;  // Zero based.
     818             :  *
     819             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     820             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     821             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     822             :  *   dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
     823             :  *                                  [nPixel + nLine * poKern->nSrcYSize];
     824             :  * \endcode
     825             :  *
     826             :  * This field is required.
     827             :  */
     828             : 
     829             : /**
     830             :  * \var GUInt32 *GDALWarpKernel::panDstValid;
     831             :  *
     832             :  * Per pixel validity mask for destination pixels.
     833             :  *
     834             :  * A single validity mask layer that applies to the pixels of all destination
     835             :  * bands.  It is accessed similarly to papanUnitifiedSrcValid, but based
     836             :  * on the size of the destination image.
     837             :  *
     838             :  * This pointer may be NULL indicating that all pixels are valid.
     839             :  */
     840             : 
     841             : /**
     842             :  * \var float *GDALWarpKernel::pafDstDensity;
     843             :  *
     844             :  * Per pixel density mask for destination pixels.
     845             :  *
     846             :  * A single density mask layer that applies to the pixels of all destination
     847             :  * bands.  It contains values between 0.0 and 1.0.
     848             :  *
     849             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     850             :  *
     851             :  * The density for a pixel may be accessed like this:
     852             :  *
     853             :  * \code
     854             :  *   float fDensity = 1.0;
     855             :  *   int   nPixel = 3; // Zero based.
     856             :  *   int   nLine = 4;  // Zero based.
     857             :  *
     858             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     859             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     860             :  *   if( poKern->pafDstDensity != NULL )
     861             :  *     fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
     862             :  * \endcode
     863             :  */
     864             : 
     865             : /**
     866             :  * \var int GDALWarpKernel::nSrcXOff;
     867             :  *
     868             :  * X offset to source pixel coordinates for transformation.
     869             :  *
     870             :  * See pfnTransformer.
     871             :  *
     872             :  * This field is required.
     873             :  */
     874             : 
     875             : /**
     876             :  * \var int GDALWarpKernel::nSrcYOff;
     877             :  *
     878             :  * Y offset to source pixel coordinates for transformation.
     879             :  *
     880             :  * See pfnTransformer.
     881             :  *
     882             :  * This field is required.
     883             :  */
     884             : 
     885             : /**
     886             :  * \var int GDALWarpKernel::nDstXOff;
     887             :  *
     888             :  * X offset to destination pixel coordinates for transformation.
     889             :  *
     890             :  * See pfnTransformer.
     891             :  *
     892             :  * This field is required.
     893             :  */
     894             : 
     895             : /**
     896             :  * \var int GDALWarpKernel::nDstYOff;
     897             :  *
     898             :  * Y offset to destination pixel coordinates for transformation.
     899             :  *
     900             :  * See pfnTransformer.
     901             :  *
     902             :  * This field is required.
     903             :  */
     904             : 
     905             : /**
     906             :  * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
     907             :  *
     908             :  * Source/destination location transformer.
     909             :  *
     910             :  * The function to call to transform coordinates between source image
     911             :  * pixel/line coordinates and destination image pixel/line coordinates.
     912             :  * See GDALTransformerFunc() for details of the semantics of this function.
     913             :  *
     914             :  * The GDALWarpKern algorithm will only ever use this transformer in
     915             :  * "destination to source" mode (bDstToSrc=TRUE), and will always pass
     916             :  * partial or complete scanlines of points in the destination image as
     917             :  * input.  This means, among other things, that it is safe to the
     918             :  * approximating transform GDALApproxTransform() as the transformation
     919             :  * function.
     920             :  *
     921             :  * Source and destination images may be subsets of a larger overall image.
     922             :  * The transformation algorithms will expect and return pixel/line coordinates
     923             :  * in terms of this larger image, so coordinates need to be offset by
     924             :  * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
     925             :  * passing to pfnTransformer, and after return from it.
     926             :  *
     927             :  * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
     928             :  * data to this function when it is called.
     929             :  *
     930             :  * This field is required.
     931             :  */
     932             : 
     933             : /**
     934             :  * \var void *GDALWarpKernel::pTransformerArg;
     935             :  *
     936             :  * Callback data for pfnTransformer.
     937             :  *
     938             :  * This field may be NULL if not required for the pfnTransformer being used.
     939             :  */
     940             : 
     941             : /**
     942             :  * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
     943             :  *
     944             :  * The function to call to report progress of the algorithm, and to check
     945             :  * for a requested termination of the operation.  It operates according to
     946             :  * GDALProgressFunc() semantics.
     947             :  *
     948             :  * Generally speaking the progress function will be invoked for each
     949             :  * scanline of the destination buffer that has been processed.
     950             :  *
     951             :  * This field may be NULL (internally set to GDALDummyProgress()).
     952             :  */
     953             : 
     954             : /**
     955             :  * \var void *GDALWarpKernel::pProgress;
     956             :  *
     957             :  * Callback data for pfnProgress.
     958             :  *
     959             :  * This field may be NULL if not required for the pfnProgress being used.
     960             :  */
     961             : 
     962             : /************************************************************************/
     963             : /*                           GDALWarpKernel()                           */
     964             : /************************************************************************/
     965             : 
     966        3739 : GDALWarpKernel::GDALWarpKernel()
     967             :     : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
     968             :       eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
     969             :       dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
     970             :       papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
     971             :       pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
     972             :       papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
     973             :       dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
     974             :       nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
     975             :       nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
     976             :       pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
     977             :       pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
     978             :       padfDstNoDataReal(nullptr), psThreadData(nullptr),
     979        3739 :       eTieStrategy(GWKTS_First)
     980             : {
     981        3739 : }
     982             : 
     983             : /************************************************************************/
     984             : /*                          ~GDALWarpKernel()                           */
     985             : /************************************************************************/
     986             : 
     987        3739 : GDALWarpKernel::~GDALWarpKernel()
     988             : {
     989        3739 : }
     990             : 
     991             : /************************************************************************/
     992             : /*                              getArea()                               */
     993             : /************************************************************************/
     994             : 
     995             : typedef std::pair<double, double> XYPair;
     996             : 
     997             : typedef std::vector<XYPair> XYPoly;
     998             : 
     999             : // poly may or may not be closed.
    1000      565793 : static double getArea(const XYPoly &poly)
    1001             : {
    1002             :     // CPLAssert(poly.size() >= 2);
    1003      565793 :     const size_t nPointCount = poly.size();
    1004             :     double dfAreaSum =
    1005      565793 :         poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
    1006             : 
    1007     1786950 :     for (size_t i = 1; i < nPointCount - 1; i++)
    1008             :     {
    1009     1221160 :         dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
    1010             :     }
    1011             : 
    1012      565793 :     dfAreaSum += poly[nPointCount - 1].first *
    1013      565793 :                  (poly[0].second - poly[nPointCount - 2].second);
    1014             : 
    1015      565793 :     return 0.5 * std::fabs(dfAreaSum);
    1016             : }
    1017             : 
    1018             : /************************************************************************/
    1019             : /*                       CanUse4SamplesFormula()                        */
    1020             : /************************************************************************/
    1021             : 
    1022        4620 : static bool CanUse4SamplesFormula(const GDALWarpKernel *poWK)
    1023             : {
    1024        4620 :     if (poWK->eResample == GRA_Bilinear || poWK->eResample == GRA_Cubic)
    1025             :     {
    1026             :         // Use 4-sample formula if we are not downsampling by more than a
    1027             :         // factor of 1:2
    1028        2637 :         if (poWK->dfXScale > 0.5 && poWK->dfYScale > 0.5)
    1029        2197 :             return true;
    1030         440 :         CPLDebugOnce("WARP",
    1031             :                      "Not using 4-sample bilinear/bicubic formula because "
    1032             :                      "XSCALE(=%f) and/or YSCALE(=%f) <= 0.5",
    1033             :                      poWK->dfXScale, poWK->dfYScale);
    1034             :     }
    1035        2423 :     return false;
    1036             : }
    1037             : 
    1038             : /************************************************************************/
    1039             : /*                            PerformWarp()                             */
    1040             : /************************************************************************/
    1041             : 
    1042             : /**
    1043             :  * \fn CPLErr GDALWarpKernel::PerformWarp();
    1044             :  *
    1045             :  * This method performs the warp described in the GDALWarpKernel.
    1046             :  *
    1047             :  * @return CE_None on success or CE_Failure if an error occurs.
    1048             :  */
    1049             : 
    1050        3735 : CPLErr GDALWarpKernel::PerformWarp()
    1051             : 
    1052             : {
    1053        3735 :     const CPLErr eErr = Validate();
    1054             : 
    1055        3735 :     if (eErr != CE_None)
    1056           1 :         return eErr;
    1057             : 
    1058             :     // See #2445 and #3079.
    1059        3734 :     if (nSrcXSize <= 0 || nSrcYSize <= 0)
    1060             :     {
    1061         600 :         if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
    1062             :         {
    1063           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    1064           0 :             return CE_Failure;
    1065             :         }
    1066         600 :         return CE_None;
    1067             :     }
    1068             : 
    1069             :     /* -------------------------------------------------------------------- */
    1070             :     /*      Pre-calculate resampling scales and window sizes for filtering. */
    1071             :     /* -------------------------------------------------------------------- */
    1072             : 
    1073        3134 :     dfXScale = 0.0;
    1074        3134 :     dfYScale = 0.0;
    1075             : 
    1076             :     // XSCALE and YSCALE per warping chunk is not necessarily ideal, in case of
    1077             :     // heterogeneous change in shapes.
    1078             :     // Best would probably be a per-pixel scale computation.
    1079        3134 :     const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
    1080        3134 :     const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
    1081        3134 :     if (!pszXScale || !pszYScale)
    1082             :     {
    1083             :         // Sample points along a grid in the destination space
    1084        3133 :         constexpr int MAX_POINTS_PER_DIM = 10;
    1085        3133 :         const int nPointsX = std::min(MAX_POINTS_PER_DIM, nDstXSize);
    1086        3133 :         const int nPointsY = std::min(MAX_POINTS_PER_DIM, nDstYSize);
    1087        3133 :         constexpr int CORNER_COUNT_PER_SQUARE = 4;
    1088        3133 :         const int nPoints = CORNER_COUNT_PER_SQUARE * nPointsX * nPointsY;
    1089        6266 :         std::vector<double> adfX;
    1090        6266 :         std::vector<double> adfY;
    1091        3133 :         adfX.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
    1092        3133 :         adfY.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
    1093        6266 :         std::vector<double> adfZ(CORNER_COUNT_PER_SQUARE * nPoints);
    1094        6266 :         std::vector<int> abSuccess(CORNER_COUNT_PER_SQUARE * nPoints);
    1095       30070 :         for (int iY = 0; iY < nPointsY; iY++)
    1096             :         {
    1097       26937 :             const double dfYShift = (iY > 0 && iY == nPointsY - 1) ? -1.0 : 0.0;
    1098       26937 :             const double dfY =
    1099       26937 :                 dfYShift + (nPointsY == 1 ? 0.0
    1100       26721 :                                           : static_cast<double>(iY) *
    1101       26721 :                                                 nDstYSize / (nPointsY - 1));
    1102             : 
    1103      285962 :             for (int iX = 0; iX < nPointsX; iX++)
    1104             :             {
    1105      259025 :                 const double dfXShift =
    1106      259025 :                     (iX > 0 && iX == nPointsX - 1) ? -1.0 : 0.0;
    1107             : 
    1108      259025 :                 const double dfX =
    1109      259025 :                     dfXShift + (nPointsX == 1 ? 0.0
    1110      258823 :                                               : static_cast<double>(iX) *
    1111      258823 :                                                     nDstXSize / (nPointsX - 1));
    1112             : 
    1113             :                 // Reproject a unit square at each sample point
    1114      259025 :                 adfX.push_back(dfX);
    1115      259025 :                 adfY.push_back(dfY);
    1116             : 
    1117      259025 :                 adfX.push_back(dfX + 1);
    1118      259025 :                 adfY.push_back(dfY);
    1119             : 
    1120      259025 :                 adfX.push_back(dfX);
    1121      259025 :                 adfY.push_back(dfY + 1);
    1122             : 
    1123      259025 :                 adfX.push_back(dfX + 1);
    1124      259025 :                 adfY.push_back(dfY + 1);
    1125             :             }
    1126             :         }
    1127        3133 :         pfnTransformer(pTransformerArg, TRUE, static_cast<int>(adfX.size()),
    1128             :                        adfX.data(), adfY.data(), adfZ.data(), abSuccess.data());
    1129             : 
    1130        6266 :         std::vector<XYPair> adfXYScales;
    1131        3133 :         adfXYScales.reserve(nPoints);
    1132      262158 :         for (int i = 0; i < nPoints; i += CORNER_COUNT_PER_SQUARE)
    1133             :         {
    1134      516934 :             if (abSuccess[i + 0] && abSuccess[i + 1] && abSuccess[i + 2] &&
    1135      257909 :                 abSuccess[i + 3])
    1136             :             {
    1137     2063260 :                 const auto square = [](double x) { return x * x; };
    1138             : 
    1139      257907 :                 const double vx01 = adfX[i + 1] - adfX[i + 0];
    1140      257907 :                 const double vy01 = adfY[i + 1] - adfY[i + 0];
    1141      257907 :                 const double len01_sq = square(vx01) + square(vy01);
    1142             : 
    1143      257907 :                 const double vx23 = adfX[i + 3] - adfX[i + 2];
    1144      257907 :                 const double vy23 = adfY[i + 3] - adfY[i + 2];
    1145      257907 :                 const double len23_sq = square(vx23) + square(vy23);
    1146             : 
    1147      257907 :                 const double vx02 = adfX[i + 2] - adfX[i + 0];
    1148      257907 :                 const double vy02 = adfY[i + 2] - adfY[i + 0];
    1149      257907 :                 const double len02_sq = square(vx02) + square(vy02);
    1150             : 
    1151      257907 :                 const double vx13 = adfX[i + 3] - adfX[i + 1];
    1152      257907 :                 const double vy13 = adfY[i + 3] - adfY[i + 1];
    1153      257907 :                 const double len13_sq = square(vx13) + square(vy13);
    1154             : 
    1155             :                 // ~ 20 degree, heuristic
    1156      257907 :                 constexpr double TAN_MODEST_ANGLE = 0.35;
    1157             : 
    1158             :                 // 10%, heuristic
    1159      257907 :                 constexpr double LENGTH_RELATIVE_TOLERANCE = 0.1;
    1160             : 
    1161             :                 // Security margin to avoid division by zero (would only
    1162             :                 // happen in case of degenerated coordinate transformation,
    1163             :                 // or insane upsampling)
    1164      257907 :                 constexpr double EPSILON = 1e-10;
    1165             : 
    1166             :                 // Does the transformed square looks like an almost non-rotated
    1167             :                 // quasi-rectangle ?
    1168      257907 :                 if (std::fabs(vy01) < TAN_MODEST_ANGLE * vx01 &&
    1169      250790 :                     std::fabs(vy23) < TAN_MODEST_ANGLE * vx23 &&
    1170      250763 :                     std::fabs(len01_sq - len23_sq) <
    1171      250763 :                         LENGTH_RELATIVE_TOLERANCE * std::fabs(len01_sq) &&
    1172      250650 :                     std::fabs(len02_sq - len13_sq) <
    1173      250650 :                         LENGTH_RELATIVE_TOLERANCE * std::fabs(len02_sq))
    1174             :                 {
    1175             :                     // Using a geometric average here of lenAB_sq and lenCD_sq,
    1176             :                     // hence a sqrt(), and as this is still a squared value,
    1177             :                     // we need another sqrt() to get a distance.
    1178             :                     const double dfXLength =
    1179      250635 :                         std::sqrt(std::sqrt(len01_sq * len23_sq));
    1180             :                     const double dfYLength =
    1181      250635 :                         std::sqrt(std::sqrt(len02_sq * len13_sq));
    1182      250635 :                     if (dfXLength > EPSILON && dfYLength > EPSILON)
    1183             :                     {
    1184      250635 :                         const double dfThisXScale = 1.0 / dfXLength;
    1185      250635 :                         const double dfThisYScale = 1.0 / dfYLength;
    1186      250635 :                         adfXYScales.push_back({dfThisXScale, dfThisYScale});
    1187      250635 :                     }
    1188             :                 }
    1189             :                 else
    1190             :                 {
    1191             :                     // If not, then consider the area of the transformed unit
    1192             :                     // square to determine the X/Y scales.
    1193        7272 :                     const XYPoly poly{{adfX[i + 0], adfY[i + 0]},
    1194        7272 :                                       {adfX[i + 1], adfY[i + 1]},
    1195        7272 :                                       {adfX[i + 3], adfY[i + 3]},
    1196       29088 :                                       {adfX[i + 2], adfY[i + 2]}};
    1197        7272 :                     const double dfSrcArea = getArea(poly);
    1198        7272 :                     const double dfFactor = std::sqrt(dfSrcArea);
    1199        7272 :                     if (dfFactor > EPSILON)
    1200             :                     {
    1201        7272 :                         const double dfThisXScale = 1.0 / dfFactor;
    1202        7272 :                         const double dfThisYScale = dfThisXScale;
    1203        7272 :                         adfXYScales.push_back({dfThisXScale, dfThisYScale});
    1204             :                     }
    1205             :                 }
    1206             :             }
    1207             :         }
    1208             : 
    1209        3133 :         if (!adfXYScales.empty())
    1210             :         {
    1211             :             // Sort by increasing xscale * yscale
    1212        3133 :             std::sort(adfXYScales.begin(), adfXYScales.end(),
    1213     1401590 :                       [](const XYPair &a, const XYPair &b)
    1214     1401590 :                       { return a.first * a.second < b.first * b.second; });
    1215             : 
    1216             :             // Compute the per-axis maximum of scale
    1217        3133 :             double dfXMax = 0;
    1218        3133 :             double dfYMax = 0;
    1219      261040 :             for (const auto &[dfX, dfY] : adfXYScales)
    1220             :             {
    1221      257907 :                 dfXMax = std::max(dfXMax, dfX);
    1222      257907 :                 dfYMax = std::max(dfYMax, dfY);
    1223             :             }
    1224             : 
    1225             :             // Now eliminate outliers, defined as ones whose value is < 10% of
    1226             :             // the maximum value, typically found at a polar discontinuity, and
    1227             :             // compute the average of non-outlier values.
    1228        3133 :             dfXScale = 0;
    1229        3133 :             dfYScale = 0;
    1230        3133 :             int i = 0;
    1231        3133 :             constexpr double THRESHOLD = 0.1;  // 10%, rather arbitrary
    1232      261040 :             for (const auto &[dfX, dfY] : adfXYScales)
    1233             :             {
    1234      257907 :                 if (dfX > THRESHOLD * dfXMax && dfY > THRESHOLD * dfYMax)
    1235             :                 {
    1236      255719 :                     ++i;
    1237      255719 :                     const double dfXDelta = dfX - dfXScale;
    1238      255719 :                     const double dfYDelta = dfY - dfYScale;
    1239      255719 :                     const double dfInvI = 1.0 / i;
    1240      255719 :                     dfXScale += dfXDelta * dfInvI;
    1241      255719 :                     dfYScale += dfYDelta * dfInvI;
    1242             :                 }
    1243             :             }
    1244             :         }
    1245             :     }
    1246             : 
    1247             :     // Round to closest integer reciprocal scale if we are very close to it
    1248             :     const auto RoundToClosestIntegerReciprocalScaleIfCloseEnough =
    1249        6268 :         [](double dfScale)
    1250             :     {
    1251        6268 :         if (dfScale < 1.0)
    1252             :         {
    1253        2521 :             double dfReciprocalScale = 1.0 / dfScale;
    1254        2521 :             const int nReciprocalScale =
    1255        2521 :                 static_cast<int>(dfReciprocalScale + 0.5);
    1256        2521 :             if (fabs(dfReciprocalScale - nReciprocalScale) < 0.05)
    1257        2152 :                 dfScale = 1.0 / nReciprocalScale;
    1258             :         }
    1259        6268 :         return dfScale;
    1260             :     };
    1261             : 
    1262        3134 :     if (dfXScale <= 0)
    1263           1 :         dfXScale = 1.0;
    1264        3134 :     if (dfYScale <= 0)
    1265           1 :         dfYScale = 1.0;
    1266             : 
    1267        3134 :     dfXScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfXScale);
    1268        3134 :     dfYScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfYScale);
    1269             : 
    1270        3134 :     if (pszXScale != nullptr)
    1271           1 :         dfXScale = CPLAtof(pszXScale);
    1272        3134 :     if (pszYScale != nullptr)
    1273           1 :         dfYScale = CPLAtof(pszYScale);
    1274             : 
    1275        3134 :     if (!pszXScale || !pszYScale)
    1276        3133 :         CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
    1277             : 
    1278        3134 :     const int bUse4SamplesFormula = CanUse4SamplesFormula(this);
    1279             : 
    1280             :     // Safety check for callers that would use GDALWarpKernel without using
    1281             :     // GDALWarpOperation.
    1282        3071 :     if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
    1283        3006 :          ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
    1284        6268 :           !bUse4SamplesFormula)) &&
    1285         348 :         atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
    1286             :             WARP_EXTRA_ELTS)
    1287             :     {
    1288           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1289             :                  "Source arrays must have WARP_EXTRA_ELTS extra elements at "
    1290             :                  "their end. "
    1291             :                  "See GDALWarpKernel class definition. If this condition is "
    1292             :                  "fulfilled, define a EXTRA_ELTS=%d warp options",
    1293             :                  WARP_EXTRA_ELTS);
    1294           0 :         return CE_Failure;
    1295             :     }
    1296             : 
    1297        3134 :     dfXFilter = anGWKFilterRadius[eResample];
    1298        3134 :     dfYFilter = anGWKFilterRadius[eResample];
    1299             : 
    1300        3134 :     nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
    1301        2581 :                               : static_cast<int>(dfXFilter);
    1302        3134 :     nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
    1303        2585 :                               : static_cast<int>(dfYFilter);
    1304             : 
    1305             :     // Filter window offset depends on the parity of the kernel radius.
    1306        3134 :     nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
    1307        3134 :     nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
    1308             : 
    1309        3134 :     bApplyVerticalShift =
    1310        3134 :         CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
    1311        3134 :     dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
    1312        3134 :         papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
    1313             : 
    1314             :     /* -------------------------------------------------------------------- */
    1315             :     /*      Set up resampling functions.                                    */
    1316             :     /* -------------------------------------------------------------------- */
    1317        3134 :     if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
    1318          12 :         return GWKGeneralCase(this);
    1319             : 
    1320        3122 :     const bool bNoMasksOrDstDensityOnly =
    1321        3115 :         papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
    1322        6237 :         pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
    1323             : 
    1324        3122 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour &&
    1325             :         bNoMasksOrDstDensityOnly)
    1326         953 :         return GWKNearestNoMasksOrDstDensityOnlyByte(this);
    1327             : 
    1328        2169 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Bilinear &&
    1329             :         bNoMasksOrDstDensityOnly)
    1330         128 :         return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
    1331             : 
    1332        2041 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Cubic &&
    1333             :         bNoMasksOrDstDensityOnly)
    1334         850 :         return GWKCubicNoMasksOrDstDensityOnlyByte(this);
    1335             : 
    1336        1191 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_CubicSpline &&
    1337             :         bNoMasksOrDstDensityOnly)
    1338          12 :         return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
    1339             : 
    1340        1179 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour)
    1341         360 :         return GWKNearestByte(this);
    1342             : 
    1343         819 :     if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
    1344         168 :         eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
    1345          14 :         return GWKNearestNoMasksOrDstDensityOnlyShort(this);
    1346             : 
    1347         805 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
    1348             :         bNoMasksOrDstDensityOnly)
    1349           5 :         return GWKCubicNoMasksOrDstDensityOnlyShort(this);
    1350             : 
    1351         800 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
    1352             :         bNoMasksOrDstDensityOnly)
    1353           6 :         return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
    1354             : 
    1355         794 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
    1356             :         bNoMasksOrDstDensityOnly)
    1357           5 :         return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
    1358             : 
    1359         789 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
    1360             :         bNoMasksOrDstDensityOnly)
    1361          14 :         return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
    1362             : 
    1363         775 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
    1364             :         bNoMasksOrDstDensityOnly)
    1365           5 :         return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
    1366             : 
    1367         770 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
    1368             :         bNoMasksOrDstDensityOnly)
    1369           6 :         return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
    1370             : 
    1371         764 :     if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
    1372          48 :         return GWKNearestShort(this);
    1373             : 
    1374         716 :     if (eWorkingDataType == GDT_UInt16 && eResample == GRA_NearestNeighbour)
    1375          10 :         return GWKNearestUnsignedShort(this);
    1376             : 
    1377         706 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
    1378             :         bNoMasksOrDstDensityOnly)
    1379          11 :         return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
    1380             : 
    1381         695 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
    1382          50 :         return GWKNearestFloat(this);
    1383             : 
    1384         645 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
    1385             :         bNoMasksOrDstDensityOnly)
    1386           4 :         return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
    1387             : 
    1388         641 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
    1389             :         bNoMasksOrDstDensityOnly)
    1390           9 :         return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
    1391             : 
    1392             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    1393             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
    1394             :         bNoMasksOrDstDensityOnly)
    1395             :         return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
    1396             : 
    1397             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
    1398             :         bNoMasksOrDstDensityOnly)
    1399             :         return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
    1400             : #endif
    1401             : 
    1402         632 :     if (eResample == GRA_Average)
    1403          77 :         return GWKAverageOrMode(this);
    1404             : 
    1405         555 :     if (eResample == GRA_RMS)
    1406           9 :         return GWKAverageOrMode(this);
    1407             : 
    1408         546 :     if (eResample == GRA_Mode)
    1409          45 :         return GWKAverageOrMode(this);
    1410             : 
    1411         501 :     if (eResample == GRA_Max)
    1412           6 :         return GWKAverageOrMode(this);
    1413             : 
    1414         495 :     if (eResample == GRA_Min)
    1415           5 :         return GWKAverageOrMode(this);
    1416             : 
    1417         490 :     if (eResample == GRA_Med)
    1418           6 :         return GWKAverageOrMode(this);
    1419             : 
    1420         484 :     if (eResample == GRA_Q1)
    1421          10 :         return GWKAverageOrMode(this);
    1422             : 
    1423         474 :     if (eResample == GRA_Q3)
    1424           5 :         return GWKAverageOrMode(this);
    1425             : 
    1426         469 :     if (eResample == GRA_Sum)
    1427          19 :         return GWKSumPreserving(this);
    1428             : 
    1429         450 :     if (!GDALDataTypeIsComplex(eWorkingDataType))
    1430             :     {
    1431         223 :         return GWKRealCase(this);
    1432             :     }
    1433             : 
    1434         227 :     return GWKGeneralCase(this);
    1435             : }
    1436             : 
    1437             : /************************************************************************/
    1438             : /*                              Validate()                              */
    1439             : /************************************************************************/
    1440             : 
    1441             : /**
    1442             :  * \fn CPLErr GDALWarpKernel::Validate()
    1443             :  *
    1444             :  * Check the settings in the GDALWarpKernel, and issue a CPLError()
    1445             :  * (and return CE_Failure) if the configuration is considered to be
    1446             :  * invalid for some reason.
    1447             :  *
    1448             :  * This method will also do some standard defaulting such as setting
    1449             :  * pfnProgress to GDALDummyProgress() if it is NULL.
    1450             :  *
    1451             :  * @return CE_None on success or CE_Failure if an error is detected.
    1452             :  */
    1453             : 
    1454        3735 : CPLErr GDALWarpKernel::Validate()
    1455             : 
    1456             : {
    1457        3735 :     if (static_cast<size_t>(eResample) >=
    1458             :         (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
    1459             :     {
    1460           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1461             :                  "Unsupported resampling method %d.",
    1462           0 :                  static_cast<int>(eResample));
    1463           0 :         return CE_Failure;
    1464             :     }
    1465             : 
    1466             :     // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
    1467             :     // be ignored as contributing source pixels during resampling. Only taken into account by
    1468             :     // Average currently
    1469             :     const char *pszExcludedValues =
    1470        3735 :         CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
    1471        3735 :     if (pszExcludedValues)
    1472             :     {
    1473             :         const CPLStringList aosTokens(
    1474          14 :             CSLTokenizeString2(pszExcludedValues, "(,)", 0));
    1475          14 :         if ((aosTokens.size() % nBands) != 0)
    1476             :         {
    1477           1 :             CPLError(CE_Failure, CPLE_AppDefined,
    1478             :                      "EXCLUDED_VALUES should contain one or several tuples of "
    1479             :                      "%d values formatted like <R>,<G>,<B> or "
    1480             :                      "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
    1481             :                      "tuples",
    1482             :                      nBands);
    1483           1 :             return CE_Failure;
    1484             :         }
    1485          26 :         std::vector<double> adfTuple;
    1486          52 :         for (int i = 0; i < aosTokens.size(); ++i)
    1487             :         {
    1488          39 :             adfTuple.push_back(CPLAtof(aosTokens[i]));
    1489          39 :             if (((i + 1) % nBands) == 0)
    1490             :             {
    1491          13 :                 m_aadfExcludedValues.push_back(adfTuple);
    1492          13 :                 adfTuple.clear();
    1493             :             }
    1494             :         }
    1495             :     }
    1496             : 
    1497        3734 :     return CE_None;
    1498             : }
    1499             : 
    1500             : /************************************************************************/
    1501             : /*                         GWKOverlayDensity()                          */
    1502             : /*                                                                      */
    1503             : /*      Compute the final density for the destination pixel.  This      */
    1504             : /*      is a function of the overlay density (passed in) and the        */
    1505             : /*      original density.                                               */
    1506             : /************************************************************************/
    1507             : 
    1508    14990200 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
    1509             :                               double dfDensity)
    1510             : {
    1511    14990200 :     if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
    1512    13024100 :         return;
    1513             : 
    1514     1966070 :     poWK->pafDstDensity[iDstOffset] =
    1515     1966070 :         1.0f -
    1516     1966070 :         (1.0f - float(dfDensity)) * (1.0f - poWK->pafDstDensity[iDstOffset]);
    1517             : }
    1518             : 
    1519             : /************************************************************************/
    1520             : /*                           GWKRoundValueT()                           */
    1521             : /************************************************************************/
    1522             : 
    1523             : template <class T, class U, bool is_signed> struct sGWKRoundValueT
    1524             : {
    1525             :     static T eval(U);
    1526             : };
    1527             : 
    1528             : template <class T, class U> struct sGWKRoundValueT<T, U, true> /* signed */
    1529             : {
    1530      791525 :     static T eval(U value)
    1531             :     {
    1532      791525 :         return static_cast<T>(floor(value + U(0.5)));
    1533             :     }
    1534             : };
    1535             : 
    1536             : template <class T, class U> struct sGWKRoundValueT<T, U, false> /* unsigned */
    1537             : {
    1538   152028497 :     static T eval(U value)
    1539             :     {
    1540   152028497 :         return static_cast<T>(value + U(0.5));
    1541             :     }
    1542             : };
    1543             : 
    1544   152820022 : template <class T, class U> static T GWKRoundValueT(U value)
    1545             : {
    1546   152820022 :     return sGWKRoundValueT<T, U, cpl::NumericLimits<T>::is_signed>::eval(value);
    1547             : }
    1548             : 
    1549      268974 : template <> float GWKRoundValueT<float, double>(double value)
    1550             : {
    1551      268974 :     return static_cast<float>(value);
    1552             : }
    1553             : 
    1554             : #ifdef notused
    1555             : template <> double GWKRoundValueT<double, double>(double value)
    1556             : {
    1557             :     return value;
    1558             : }
    1559             : #endif
    1560             : 
    1561             : /************************************************************************/
    1562             : /*                           GWKClampValueT()                           */
    1563             : /************************************************************************/
    1564             : 
    1565   145649582 : template <class T, class U> static CPL_INLINE T GWKClampValueT(U value)
    1566             : {
    1567   145649582 :     if (value < static_cast<U>(cpl::NumericLimits<T>::min()))
    1568      568589 :         return cpl::NumericLimits<T>::min();
    1569   145080984 :     else if (value > static_cast<U>(cpl::NumericLimits<T>::max()))
    1570      773151 :         return cpl::NumericLimits<T>::max();
    1571             :     else
    1572   144308044 :         return GWKRoundValueT<T, U>(value);
    1573             : }
    1574             : 
    1575      718914 : template <> float GWKClampValueT<float, double>(double dfValue)
    1576             : {
    1577      718914 :     return static_cast<float>(dfValue);
    1578             : }
    1579             : 
    1580             : #ifdef notused
    1581             : template <> double GWKClampValueT<double, double>(double dfValue)
    1582             : {
    1583             :     return dfValue;
    1584             : }
    1585             : #endif
    1586             : 
    1587             : /************************************************************************/
    1588             : /*                            AvoidNoData()                             */
    1589             : /************************************************************************/
    1590             : 
    1591        1283 : template <class T> inline void AvoidNoData(T *pDst, GPtrDiff_t iDstOffset)
    1592             : {
    1593             :     if constexpr (cpl::NumericLimits<T>::is_integer)
    1594             :     {
    1595        1027 :         if (pDst[iDstOffset] == static_cast<T>(cpl::NumericLimits<T>::lowest()))
    1596             :         {
    1597         515 :             pDst[iDstOffset] =
    1598         515 :                 static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
    1599             :         }
    1600             :         else
    1601         512 :             pDst[iDstOffset]--;
    1602             :     }
    1603             :     else
    1604             :     {
    1605         256 :         if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
    1606             :         {
    1607             :             using std::nextafter;
    1608           0 :             pDst[iDstOffset] = nextafter(pDst[iDstOffset], static_cast<T>(0));
    1609             :         }
    1610             :         else
    1611             :         {
    1612             :             using std::nextafter;
    1613         256 :             pDst[iDstOffset] =
    1614         256 :                 nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
    1615             :         }
    1616             :     }
    1617        1283 : }
    1618             : 
    1619             : /************************************************************************/
    1620             : /*                            AvoidNoData()                             */
    1621             : /************************************************************************/
    1622             : 
    1623             : template <class T>
    1624    18505930 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1625             :                         GPtrDiff_t iDstOffset)
    1626             : {
    1627    18505930 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1628    18505930 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1629             : 
    1630    18505930 :     if (poWK->padfDstNoDataReal != nullptr &&
    1631    11380638 :         poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
    1632             :     {
    1633         640 :         AvoidNoData(pDst, iDstOffset);
    1634             : 
    1635         640 :         if (!poWK->bWarnedAboutDstNoDataReplacement)
    1636             :         {
    1637          40 :             const_cast<GDALWarpKernel *>(poWK)
    1638             :                 ->bWarnedAboutDstNoDataReplacement = true;
    1639          40 :             CPLError(CE_Warning, CPLE_AppDefined,
    1640             :                      "Value %g in the source dataset has been changed to %g "
    1641             :                      "in the destination dataset to avoid being treated as "
    1642             :                      "NoData. To avoid this, select a different NoData value "
    1643             :                      "for the destination dataset.",
    1644          40 :                      poWK->padfDstNoDataReal[iBand],
    1645          40 :                      static_cast<double>(pDst[iDstOffset]));
    1646             :         }
    1647             :     }
    1648    18505930 : }
    1649             : 
    1650             : /************************************************************************/
    1651             : /*                      GWKAvoidNoDataMultiBand()                       */
    1652             : /************************************************************************/
    1653             : 
    1654             : template <class T>
    1655      524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
    1656             :                                     GPtrDiff_t iDstOffset)
    1657             : {
    1658      524573 :     T **ppDst = reinterpret_cast<T **>(poWK->papabyDstImage);
    1659      524573 :     if (poWK->padfDstNoDataReal != nullptr)
    1660             :     {
    1661      208615 :         for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1662             :         {
    1663      208294 :             if (poWK->padfDstNoDataReal[iBand] !=
    1664      208294 :                 static_cast<double>(ppDst[iBand][iDstOffset]))
    1665      205830 :                 return;
    1666             :         }
    1667         964 :         for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1668             :         {
    1669         643 :             AvoidNoData(ppDst[iBand], iDstOffset);
    1670             :         }
    1671             : 
    1672         321 :         if (!poWK->bWarnedAboutDstNoDataReplacement)
    1673             :         {
    1674          21 :             const_cast<GDALWarpKernel *>(poWK)
    1675             :                 ->bWarnedAboutDstNoDataReplacement = true;
    1676          42 :             std::string valueSrc, valueDst;
    1677          64 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1678             :             {
    1679          43 :                 if (!valueSrc.empty())
    1680             :                 {
    1681          22 :                     valueSrc += ',';
    1682          22 :                     valueDst += ',';
    1683             :                 }
    1684          43 :                 valueSrc += CPLSPrintf("%g", poWK->padfDstNoDataReal[iBand]);
    1685          43 :                 valueDst += CPLSPrintf(
    1686          43 :                     "%g", static_cast<double>(ppDst[iBand][iDstOffset]));
    1687             :             }
    1688          21 :             CPLError(CE_Warning, CPLE_AppDefined,
    1689             :                      "Value %s in the source dataset has been changed to %s "
    1690             :                      "in the destination dataset to avoid being treated as "
    1691             :                      "NoData. To avoid this, select a different NoData value "
    1692             :                      "for the destination dataset.",
    1693             :                      valueSrc.c_str(), valueDst.c_str());
    1694             :         }
    1695             :     }
    1696             : }
    1697             : 
    1698             : /************************************************************************/
    1699             : /*                      GWKAvoidNoDataMultiBand()                       */
    1700             : /************************************************************************/
    1701             : 
    1702      524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
    1703             :                                     GPtrDiff_t iDstOffset)
    1704             : {
    1705      524573 :     switch (poWK->eWorkingDataType)
    1706             :     {
    1707      523997 :         case GDT_UInt8:
    1708      523997 :             GWKAvoidNoDataMultiBand<std::uint8_t>(poWK, iDstOffset);
    1709      523997 :             break;
    1710             : 
    1711           0 :         case GDT_Int8:
    1712           0 :             GWKAvoidNoDataMultiBand<std::int8_t>(poWK, iDstOffset);
    1713           0 :             break;
    1714             : 
    1715         128 :         case GDT_Int16:
    1716         128 :             GWKAvoidNoDataMultiBand<std::int16_t>(poWK, iDstOffset);
    1717         128 :             break;
    1718             : 
    1719          64 :         case GDT_UInt16:
    1720          64 :             GWKAvoidNoDataMultiBand<std::uint16_t>(poWK, iDstOffset);
    1721          64 :             break;
    1722             : 
    1723          64 :         case GDT_Int32:
    1724          64 :             GWKAvoidNoDataMultiBand<std::int32_t>(poWK, iDstOffset);
    1725          64 :             break;
    1726             : 
    1727          64 :         case GDT_UInt32:
    1728          64 :             GWKAvoidNoDataMultiBand<std::uint32_t>(poWK, iDstOffset);
    1729          64 :             break;
    1730             : 
    1731          64 :         case GDT_Int64:
    1732          64 :             GWKAvoidNoDataMultiBand<std::int64_t>(poWK, iDstOffset);
    1733          64 :             break;
    1734             : 
    1735          64 :         case GDT_UInt64:
    1736          64 :             GWKAvoidNoDataMultiBand<std::uint64_t>(poWK, iDstOffset);
    1737          64 :             break;
    1738             : 
    1739           0 :         case GDT_Float16:
    1740           0 :             GWKAvoidNoDataMultiBand<GFloat16>(poWK, iDstOffset);
    1741           0 :             break;
    1742             : 
    1743          64 :         case GDT_Float32:
    1744          64 :             GWKAvoidNoDataMultiBand<float>(poWK, iDstOffset);
    1745          64 :             break;
    1746             : 
    1747          64 :         case GDT_Float64:
    1748          64 :             GWKAvoidNoDataMultiBand<double>(poWK, iDstOffset);
    1749          64 :             break;
    1750             : 
    1751           0 :         case GDT_CInt16:
    1752             :         case GDT_CInt32:
    1753             :         case GDT_CFloat16:
    1754             :         case GDT_CFloat32:
    1755             :         case GDT_CFloat64:
    1756             :         case GDT_Unknown:
    1757             :         case GDT_TypeCount:
    1758           0 :             break;
    1759             :     }
    1760      524573 : }
    1761             : 
    1762             : /************************************************************************/
    1763             : /*                       GWKSetPixelValueRealT()                        */
    1764             : /************************************************************************/
    1765             : 
    1766             : template <class T>
    1767    14953877 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
    1768             :                                   GPtrDiff_t iDstOffset, double dfDensity,
    1769             :                                   T value, bool bAvoidNoDataSingleBand)
    1770             : {
    1771    14953877 :     T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    1772             : 
    1773             :     /* -------------------------------------------------------------------- */
    1774             :     /*      If the source density is less than 100% we need to fetch the    */
    1775             :     /*      existing destination value, and mix it with the source to       */
    1776             :     /*      get the new "to apply" value.  Also compute composite           */
    1777             :     /*      density.                                                        */
    1778             :     /*                                                                      */
    1779             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1780             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1781             :     /* -------------------------------------------------------------------- */
    1782    14953877 :     if (dfDensity < 0.9999)
    1783             :     {
    1784      945508 :         if (dfDensity < 0.0001)
    1785           0 :             return true;
    1786             : 
    1787      945508 :         double dfDstDensity = 1.0;
    1788             : 
    1789      945508 :         if (poWK->pafDstDensity != nullptr)
    1790      944036 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    1791        1472 :         else if (poWK->panDstValid != nullptr &&
    1792           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1793           0 :             dfDstDensity = 0.0;
    1794             : 
    1795             :         // It seems like we also ought to be testing panDstValid[] here!
    1796             : 
    1797      945508 :         const double dfDstReal = static_cast<double>(pDst[iDstOffset]);
    1798             : 
    1799             :         // The destination density is really only relative to the portion
    1800             :         // not occluded by the overlay.
    1801      945508 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1802             : 
    1803      945508 :         const double dfReal =
    1804      945508 :             (double(value) * dfDensity + dfDstReal * dfDstInfluence) /
    1805      945508 :             (dfDensity + dfDstInfluence);
    1806             : 
    1807             :         /* --------------------------------------------------------------------
    1808             :          */
    1809             :         /*      Actually apply the destination value. */
    1810             :         /*                                                                      */
    1811             :         /*      Avoid using the destination nodata value for integer datatypes
    1812             :          */
    1813             :         /*      if by chance it is equal to the computed pixel value. */
    1814             :         /* --------------------------------------------------------------------
    1815             :          */
    1816      945508 :         pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
    1817             :     }
    1818             :     else
    1819             :     {
    1820    14008336 :         pDst[iDstOffset] = value;
    1821             :     }
    1822             : 
    1823    14953877 :     if (bAvoidNoDataSingleBand)
    1824    13681221 :         AvoidNoData<T>(poWK, iBand, iDstOffset);
    1825             : 
    1826    14953877 :     return true;
    1827             : }
    1828             : 
    1829             : /************************************************************************/
    1830             : /*                      ClampRoundAndAvoidNoData()                      */
    1831             : /************************************************************************/
    1832             : 
    1833             : template <class T>
    1834     5125155 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1835             :                                      GPtrDiff_t iDstOffset, double dfReal,
    1836             :                                      bool bAvoidNoDataSingleBand)
    1837             : {
    1838     5125155 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1839     5125155 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1840             : 
    1841             :     if constexpr (cpl::NumericLimits<T>::is_integer)
    1842             :     {
    1843             :         using std::floor;
    1844     4628025 :         if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
    1845        5298 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
    1846     4622725 :         else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1847       23635 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
    1848             :         else if constexpr (cpl::NumericLimits<T>::is_signed)
    1849       10410 :             pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
    1850             :         else
    1851     4588685 :             pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
    1852             :     }
    1853             :     else
    1854             :     {
    1855      497130 :         pDst[iDstOffset] = static_cast<T>(dfReal);
    1856             :     }
    1857             : 
    1858     5125155 :     if (bAvoidNoDataSingleBand)
    1859     4824749 :         AvoidNoData<T>(poWK, iBand, iDstOffset);
    1860     5125155 : }
    1861             : 
    1862             : /************************************************************************/
    1863             : /*                          GWKSetPixelValue()                          */
    1864             : /************************************************************************/
    1865             : 
    1866     4012410 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
    1867             :                              GPtrDiff_t iDstOffset, double dfDensity,
    1868             :                              double dfReal, double dfImag,
    1869             :                              bool bAvoidNoDataSingleBand)
    1870             : 
    1871             : {
    1872     4012410 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1873             : 
    1874             :     /* -------------------------------------------------------------------- */
    1875             :     /*      If the source density is less than 100% we need to fetch the    */
    1876             :     /*      existing destination value, and mix it with the source to       */
    1877             :     /*      get the new "to apply" value.  Also compute composite           */
    1878             :     /*      density.                                                        */
    1879             :     /*                                                                      */
    1880             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1881             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1882             :     /* -------------------------------------------------------------------- */
    1883     4012410 :     if (dfDensity < 0.9999)
    1884             :     {
    1885         800 :         if (dfDensity < 0.0001)
    1886           0 :             return true;
    1887             : 
    1888         800 :         double dfDstDensity = 1.0;
    1889         800 :         if (poWK->pafDstDensity != nullptr)
    1890         800 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    1891           0 :         else if (poWK->panDstValid != nullptr &&
    1892           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1893           0 :             dfDstDensity = 0.0;
    1894             : 
    1895         800 :         double dfDstReal = 0.0;
    1896         800 :         double dfDstImag = 0.0;
    1897             :         // It seems like we also ought to be testing panDstValid[] here!
    1898             : 
    1899             :         // TODO(schwehr): Factor out this repreated type of set.
    1900         800 :         switch (poWK->eWorkingDataType)
    1901             :         {
    1902           0 :             case GDT_UInt8:
    1903           0 :                 dfDstReal = pabyDst[iDstOffset];
    1904           0 :                 dfDstImag = 0.0;
    1905           0 :                 break;
    1906             : 
    1907           0 :             case GDT_Int8:
    1908           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1909           0 :                 dfDstImag = 0.0;
    1910           0 :                 break;
    1911             : 
    1912         400 :             case GDT_Int16:
    1913         400 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1914         400 :                 dfDstImag = 0.0;
    1915         400 :                 break;
    1916             : 
    1917         400 :             case GDT_UInt16:
    1918         400 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1919         400 :                 dfDstImag = 0.0;
    1920         400 :                 break;
    1921             : 
    1922           0 :             case GDT_Int32:
    1923           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1924           0 :                 dfDstImag = 0.0;
    1925           0 :                 break;
    1926             : 
    1927           0 :             case GDT_UInt32:
    1928           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1929           0 :                 dfDstImag = 0.0;
    1930           0 :                 break;
    1931             : 
    1932           0 :             case GDT_Int64:
    1933           0 :                 dfDstReal = static_cast<double>(
    1934           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1935           0 :                 dfDstImag = 0.0;
    1936           0 :                 break;
    1937             : 
    1938           0 :             case GDT_UInt64:
    1939           0 :                 dfDstReal = static_cast<double>(
    1940           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1941           0 :                 dfDstImag = 0.0;
    1942           0 :                 break;
    1943             : 
    1944           0 :             case GDT_Float16:
    1945           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    1946           0 :                 dfDstImag = 0.0;
    1947           0 :                 break;
    1948             : 
    1949           0 :             case GDT_Float32:
    1950           0 :                 dfDstReal =
    1951           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
    1952           0 :                 dfDstImag = 0.0;
    1953           0 :                 break;
    1954             : 
    1955           0 :             case GDT_Float64:
    1956           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1957           0 :                 dfDstImag = 0.0;
    1958           0 :                 break;
    1959             : 
    1960           0 :             case GDT_CInt16:
    1961           0 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
    1962           0 :                 dfDstImag =
    1963           0 :                     reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
    1964           0 :                 break;
    1965             : 
    1966           0 :             case GDT_CInt32:
    1967           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
    1968           0 :                 dfDstImag =
    1969           0 :                     reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
    1970           0 :                 break;
    1971             : 
    1972           0 :             case GDT_CFloat16:
    1973             :                 dfDstReal =
    1974           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
    1975             :                 dfDstImag =
    1976           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
    1977           0 :                 break;
    1978             : 
    1979           0 :             case GDT_CFloat32:
    1980           0 :                 dfDstReal =
    1981           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset * 2]);
    1982           0 :                 dfDstImag = double(
    1983           0 :                     reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1]);
    1984           0 :                 break;
    1985             : 
    1986           0 :             case GDT_CFloat64:
    1987           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
    1988           0 :                 dfDstImag =
    1989           0 :                     reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
    1990           0 :                 break;
    1991             : 
    1992           0 :             case GDT_Unknown:
    1993             :             case GDT_TypeCount:
    1994           0 :                 CPLAssert(false);
    1995             :                 return false;
    1996             :         }
    1997             : 
    1998             :         // The destination density is really only relative to the portion
    1999             :         // not occluded by the overlay.
    2000         800 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    2001             : 
    2002         800 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    2003         800 :                  (dfDensity + dfDstInfluence);
    2004             : 
    2005         800 :         dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
    2006         800 :                  (dfDensity + dfDstInfluence);
    2007             :     }
    2008             : 
    2009             :     /* -------------------------------------------------------------------- */
    2010             :     /*      Actually apply the destination value.                           */
    2011             :     /*                                                                      */
    2012             :     /*      Avoid using the destination nodata value for integer datatypes  */
    2013             :     /*      if by chance it is equal to the computed pixel value.           */
    2014             :     /* -------------------------------------------------------------------- */
    2015             : 
    2016     4012410 :     switch (poWK->eWorkingDataType)
    2017             :     {
    2018     3290010 :         case GDT_UInt8:
    2019     3290010 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
    2020             :                                             bAvoidNoDataSingleBand);
    2021     3290010 :             break;
    2022             : 
    2023           0 :         case GDT_Int8:
    2024           0 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
    2025             :                                             bAvoidNoDataSingleBand);
    2026           0 :             break;
    2027             : 
    2028        7472 :         case GDT_Int16:
    2029        7472 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
    2030             :                                              bAvoidNoDataSingleBand);
    2031        7472 :             break;
    2032             : 
    2033         464 :         case GDT_UInt16:
    2034         464 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
    2035             :                                               bAvoidNoDataSingleBand);
    2036         464 :             break;
    2037             : 
    2038          63 :         case GDT_UInt32:
    2039          63 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
    2040             :                                               bAvoidNoDataSingleBand);
    2041          63 :             break;
    2042             : 
    2043          63 :         case GDT_Int32:
    2044          63 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
    2045             :                                              bAvoidNoDataSingleBand);
    2046          63 :             break;
    2047             : 
    2048           0 :         case GDT_UInt64:
    2049           0 :             ClampRoundAndAvoidNoData<std::uint64_t>(
    2050             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2051           0 :             break;
    2052             : 
    2053           0 :         case GDT_Int64:
    2054           0 :             ClampRoundAndAvoidNoData<std::int64_t>(
    2055             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2056           0 :             break;
    2057             : 
    2058           0 :         case GDT_Float16:
    2059           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
    2060             :                                                bAvoidNoDataSingleBand);
    2061           0 :             break;
    2062             : 
    2063      478957 :         case GDT_Float32:
    2064      478957 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
    2065             :                                             bAvoidNoDataSingleBand);
    2066      478957 :             break;
    2067             : 
    2068         149 :         case GDT_Float64:
    2069         149 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
    2070             :                                              bAvoidNoDataSingleBand);
    2071         149 :             break;
    2072             : 
    2073      234079 :         case GDT_CInt16:
    2074             :         {
    2075             :             typedef GInt16 T;
    2076      234079 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    2077           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2078           0 :                     cpl::NumericLimits<T>::min();
    2079      234079 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    2080           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2081           0 :                     cpl::NumericLimits<T>::max();
    2082             :             else
    2083      234079 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2084      234079 :                     static_cast<T>(floor(dfReal + 0.5));
    2085      234079 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    2086           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2087           0 :                     cpl::NumericLimits<T>::min();
    2088      234079 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    2089           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2090           0 :                     cpl::NumericLimits<T>::max();
    2091             :             else
    2092      234079 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2093      234079 :                     static_cast<T>(floor(dfImag + 0.5));
    2094      234079 :             break;
    2095             :         }
    2096             : 
    2097         379 :         case GDT_CInt32:
    2098             :         {
    2099             :             typedef GInt32 T;
    2100         379 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    2101           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2102           0 :                     cpl::NumericLimits<T>::min();
    2103         379 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    2104           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2105           0 :                     cpl::NumericLimits<T>::max();
    2106             :             else
    2107         379 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2108         379 :                     static_cast<T>(floor(dfReal + 0.5));
    2109         379 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    2110           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2111           0 :                     cpl::NumericLimits<T>::min();
    2112         379 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    2113           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2114           0 :                     cpl::NumericLimits<T>::max();
    2115             :             else
    2116         379 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2117         379 :                     static_cast<T>(floor(dfImag + 0.5));
    2118         379 :             break;
    2119             :         }
    2120             : 
    2121           0 :         case GDT_CFloat16:
    2122           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
    2123           0 :                 static_cast<GFloat16>(dfReal);
    2124           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
    2125           0 :                 static_cast<GFloat16>(dfImag);
    2126           0 :             break;
    2127             : 
    2128         394 :         case GDT_CFloat32:
    2129         394 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
    2130         394 :                 static_cast<float>(dfReal);
    2131         394 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
    2132         394 :                 static_cast<float>(dfImag);
    2133         394 :             break;
    2134             : 
    2135         380 :         case GDT_CFloat64:
    2136         380 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
    2137         380 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
    2138         380 :             break;
    2139             : 
    2140           0 :         case GDT_Unknown:
    2141             :         case GDT_TypeCount:
    2142           0 :             return false;
    2143             :     }
    2144             : 
    2145     4012410 :     return true;
    2146             : }
    2147             : 
    2148             : /************************************************************************/
    2149             : /*                        GWKSetPixelValueReal()                        */
    2150             : /************************************************************************/
    2151             : 
    2152     1347980 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2153             :                                  GPtrDiff_t iDstOffset, double dfDensity,
    2154             :                                  double dfReal, bool bAvoidNoDataSingleBand)
    2155             : 
    2156             : {
    2157     1347980 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    2158             : 
    2159             :     /* -------------------------------------------------------------------- */
    2160             :     /*      If the source density is less than 100% we need to fetch the    */
    2161             :     /*      existing destination value, and mix it with the source to       */
    2162             :     /*      get the new "to apply" value.  Also compute composite           */
    2163             :     /*      density.                                                        */
    2164             :     /*                                                                      */
    2165             :     /*      We avoid mixing if density is very near one or risk mixing      */
    2166             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    2167             :     /* -------------------------------------------------------------------- */
    2168     1347980 :     if (dfDensity < 0.9999)
    2169             :     {
    2170         600 :         if (dfDensity < 0.0001)
    2171           0 :             return true;
    2172             : 
    2173         600 :         double dfDstReal = 0.0;
    2174         600 :         double dfDstDensity = 1.0;
    2175             : 
    2176         600 :         if (poWK->pafDstDensity != nullptr)
    2177         600 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    2178           0 :         else if (poWK->panDstValid != nullptr &&
    2179           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    2180           0 :             dfDstDensity = 0.0;
    2181             : 
    2182             :         // It seems like we also ought to be testing panDstValid[] here!
    2183             : 
    2184         600 :         switch (poWK->eWorkingDataType)
    2185             :         {
    2186           0 :             case GDT_UInt8:
    2187           0 :                 dfDstReal = pabyDst[iDstOffset];
    2188           0 :                 break;
    2189             : 
    2190           0 :             case GDT_Int8:
    2191           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    2192           0 :                 break;
    2193             : 
    2194         300 :             case GDT_Int16:
    2195         300 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    2196         300 :                 break;
    2197             : 
    2198         300 :             case GDT_UInt16:
    2199         300 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    2200         300 :                 break;
    2201             : 
    2202           0 :             case GDT_Int32:
    2203           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    2204           0 :                 break;
    2205             : 
    2206           0 :             case GDT_UInt32:
    2207           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    2208           0 :                 break;
    2209             : 
    2210           0 :             case GDT_Int64:
    2211           0 :                 dfDstReal = static_cast<double>(
    2212           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    2213           0 :                 break;
    2214             : 
    2215           0 :             case GDT_UInt64:
    2216           0 :                 dfDstReal = static_cast<double>(
    2217           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    2218           0 :                 break;
    2219             : 
    2220           0 :             case GDT_Float16:
    2221           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    2222           0 :                 break;
    2223             : 
    2224           0 :             case GDT_Float32:
    2225           0 :                 dfDstReal =
    2226           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
    2227           0 :                 break;
    2228             : 
    2229           0 :             case GDT_Float64:
    2230           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    2231           0 :                 break;
    2232             : 
    2233           0 :             case GDT_CInt16:
    2234             :             case GDT_CInt32:
    2235             :             case GDT_CFloat16:
    2236             :             case GDT_CFloat32:
    2237             :             case GDT_CFloat64:
    2238             :             case GDT_Unknown:
    2239             :             case GDT_TypeCount:
    2240           0 :                 CPLAssert(false);
    2241             :                 return false;
    2242             :         }
    2243             : 
    2244             :         // The destination density is really only relative to the portion
    2245             :         // not occluded by the overlay.
    2246         600 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    2247             : 
    2248         600 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    2249         600 :                  (dfDensity + dfDstInfluence);
    2250             :     }
    2251             : 
    2252             :     /* -------------------------------------------------------------------- */
    2253             :     /*      Actually apply the destination value.                           */
    2254             :     /*                                                                      */
    2255             :     /*      Avoid using the destination nodata value for integer datatypes  */
    2256             :     /*      if by chance it is equal to the computed pixel value.           */
    2257             :     /* -------------------------------------------------------------------- */
    2258             : 
    2259     1347980 :     switch (poWK->eWorkingDataType)
    2260             :     {
    2261     1325840 :         case GDT_UInt8:
    2262     1325840 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
    2263             :                                             bAvoidNoDataSingleBand);
    2264     1325840 :             break;
    2265             : 
    2266           0 :         case GDT_Int8:
    2267           0 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
    2268             :                                             bAvoidNoDataSingleBand);
    2269           0 :             break;
    2270             : 
    2271        1309 :         case GDT_Int16:
    2272        1309 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
    2273             :                                              bAvoidNoDataSingleBand);
    2274        1309 :             break;
    2275             : 
    2276         475 :         case GDT_UInt16:
    2277         475 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
    2278             :                                               bAvoidNoDataSingleBand);
    2279         475 :             break;
    2280             : 
    2281         539 :         case GDT_UInt32:
    2282         539 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
    2283             :                                               bAvoidNoDataSingleBand);
    2284         539 :             break;
    2285             : 
    2286        1342 :         case GDT_Int32:
    2287        1342 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
    2288             :                                              bAvoidNoDataSingleBand);
    2289        1342 :             break;
    2290             : 
    2291         224 :         case GDT_UInt64:
    2292         224 :             ClampRoundAndAvoidNoData<std::uint64_t>(
    2293             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2294         224 :             break;
    2295             : 
    2296         224 :         case GDT_Int64:
    2297         224 :             ClampRoundAndAvoidNoData<std::int64_t>(
    2298             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2299         224 :             break;
    2300             : 
    2301           0 :         case GDT_Float16:
    2302           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
    2303             :                                                bAvoidNoDataSingleBand);
    2304           0 :             break;
    2305             : 
    2306        3538 :         case GDT_Float32:
    2307        3538 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
    2308             :                                             bAvoidNoDataSingleBand);
    2309        3538 :             break;
    2310             : 
    2311       14486 :         case GDT_Float64:
    2312       14486 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
    2313             :                                              bAvoidNoDataSingleBand);
    2314       14486 :             break;
    2315             : 
    2316           0 :         case GDT_CInt16:
    2317             :         case GDT_CInt32:
    2318             :         case GDT_CFloat16:
    2319             :         case GDT_CFloat32:
    2320             :         case GDT_CFloat64:
    2321           0 :             return false;
    2322             : 
    2323           0 :         case GDT_Unknown:
    2324             :         case GDT_TypeCount:
    2325           0 :             CPLAssert(false);
    2326             :             return false;
    2327             :     }
    2328             : 
    2329     1347980 :     return true;
    2330             : }
    2331             : 
    2332             : /************************************************************************/
    2333             : /*                          GWKGetPixelValue()                          */
    2334             : /************************************************************************/
    2335             : 
    2336             : /* It is assumed that panUnifiedSrcValid has been checked before */
    2337             : 
    2338    30268000 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
    2339             :                              GPtrDiff_t iSrcOffset, double *pdfDensity,
    2340             :                              double *pdfReal, double *pdfImag)
    2341             : 
    2342             : {
    2343    30268000 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2344             : 
    2345    60536000 :     if (poWK->papanBandSrcValid != nullptr &&
    2346    30268000 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2347           0 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2348             :     {
    2349           0 :         *pdfDensity = 0.0;
    2350           0 :         return false;
    2351             :     }
    2352             : 
    2353    30268000 :     *pdfReal = 0.0;
    2354    30268000 :     *pdfImag = 0.0;
    2355             : 
    2356             :     // TODO(schwehr): Fix casting.
    2357    30268000 :     switch (poWK->eWorkingDataType)
    2358             :     {
    2359    29191100 :         case GDT_UInt8:
    2360    29191100 :             *pdfReal = pabySrc[iSrcOffset];
    2361    29191100 :             *pdfImag = 0.0;
    2362    29191100 :             break;
    2363             : 
    2364           0 :         case GDT_Int8:
    2365           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2366           0 :             *pdfImag = 0.0;
    2367           0 :             break;
    2368             : 
    2369       28232 :         case GDT_Int16:
    2370       28232 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2371       28232 :             *pdfImag = 0.0;
    2372       28232 :             break;
    2373             : 
    2374         166 :         case GDT_UInt16:
    2375         166 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2376         166 :             *pdfImag = 0.0;
    2377         166 :             break;
    2378             : 
    2379          63 :         case GDT_Int32:
    2380          63 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2381          63 :             *pdfImag = 0.0;
    2382          63 :             break;
    2383             : 
    2384          63 :         case GDT_UInt32:
    2385          63 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2386          63 :             *pdfImag = 0.0;
    2387          63 :             break;
    2388             : 
    2389           0 :         case GDT_Int64:
    2390           0 :             *pdfReal = static_cast<double>(
    2391           0 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2392           0 :             *pdfImag = 0.0;
    2393           0 :             break;
    2394             : 
    2395           0 :         case GDT_UInt64:
    2396           0 :             *pdfReal = static_cast<double>(
    2397           0 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2398           0 :             *pdfImag = 0.0;
    2399           0 :             break;
    2400             : 
    2401           0 :         case GDT_Float16:
    2402           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2403           0 :             *pdfImag = 0.0;
    2404           0 :             break;
    2405             : 
    2406     1047220 :         case GDT_Float32:
    2407     1047220 :             *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
    2408     1047220 :             *pdfImag = 0.0;
    2409     1047220 :             break;
    2410             : 
    2411         587 :         case GDT_Float64:
    2412         587 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2413         587 :             *pdfImag = 0.0;
    2414         587 :             break;
    2415             : 
    2416         133 :         case GDT_CInt16:
    2417         133 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
    2418         133 :             *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2419         133 :             break;
    2420             : 
    2421         133 :         case GDT_CInt32:
    2422         133 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
    2423         133 :             *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
    2424         133 :             break;
    2425             : 
    2426           0 :         case GDT_CFloat16:
    2427           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
    2428           0 :             *pdfImag =
    2429           0 :                 reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2430           0 :             break;
    2431             : 
    2432         194 :         case GDT_CFloat32:
    2433         194 :             *pdfReal =
    2434         194 :                 double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2]);
    2435         194 :             *pdfImag =
    2436         194 :                 double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1]);
    2437         194 :             break;
    2438             : 
    2439         138 :         case GDT_CFloat64:
    2440         138 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
    2441         138 :             *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
    2442         138 :             break;
    2443             : 
    2444           0 :         case GDT_Unknown:
    2445             :         case GDT_TypeCount:
    2446           0 :             CPLAssert(false);
    2447             :             *pdfDensity = 0.0;
    2448             :             return false;
    2449             :     }
    2450             : 
    2451    30268000 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2452     4194800 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2453             :     else
    2454    26073200 :         *pdfDensity = 1.0;
    2455             : 
    2456    30268000 :     return *pdfDensity != 0.0;
    2457             : }
    2458             : 
    2459             : /************************************************************************/
    2460             : /*                        GWKGetPixelValueReal()                        */
    2461             : /************************************************************************/
    2462             : 
    2463       15516 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2464             :                                  GPtrDiff_t iSrcOffset, double *pdfDensity,
    2465             :                                  double *pdfReal)
    2466             : 
    2467             : {
    2468       15516 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2469             : 
    2470       31034 :     if (poWK->papanBandSrcValid != nullptr &&
    2471       15518 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2472           2 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2473             :     {
    2474           0 :         *pdfDensity = 0.0;
    2475           0 :         return false;
    2476             :     }
    2477             : 
    2478       15516 :     switch (poWK->eWorkingDataType)
    2479             :     {
    2480           1 :         case GDT_UInt8:
    2481           1 :             *pdfReal = pabySrc[iSrcOffset];
    2482           1 :             break;
    2483             : 
    2484           0 :         case GDT_Int8:
    2485           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2486           0 :             break;
    2487             : 
    2488           1 :         case GDT_Int16:
    2489           1 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2490           1 :             break;
    2491             : 
    2492           1 :         case GDT_UInt16:
    2493           1 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2494           1 :             break;
    2495             : 
    2496         982 :         case GDT_Int32:
    2497         982 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2498         982 :             break;
    2499             : 
    2500         179 :         case GDT_UInt32:
    2501         179 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2502         179 :             break;
    2503             : 
    2504         112 :         case GDT_Int64:
    2505         112 :             *pdfReal = static_cast<double>(
    2506         112 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2507         112 :             break;
    2508             : 
    2509         112 :         case GDT_UInt64:
    2510         112 :             *pdfReal = static_cast<double>(
    2511         112 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2512         112 :             break;
    2513             : 
    2514           0 :         case GDT_Float16:
    2515           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2516           0 :             break;
    2517             : 
    2518           2 :         case GDT_Float32:
    2519           2 :             *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
    2520           2 :             break;
    2521             : 
    2522       14126 :         case GDT_Float64:
    2523       14126 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2524       14126 :             break;
    2525             : 
    2526           0 :         case GDT_CInt16:
    2527             :         case GDT_CInt32:
    2528             :         case GDT_CFloat16:
    2529             :         case GDT_CFloat32:
    2530             :         case GDT_CFloat64:
    2531             :         case GDT_Unknown:
    2532             :         case GDT_TypeCount:
    2533           0 :             CPLAssert(false);
    2534             :             return false;
    2535             :     }
    2536             : 
    2537       15516 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2538           0 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2539             :     else
    2540       15516 :         *pdfDensity = 1.0;
    2541             : 
    2542       15516 :     return *pdfDensity != 0.0;
    2543             : }
    2544             : 
    2545             : /************************************************************************/
    2546             : /*                           GWKGetPixelRow()                           */
    2547             : /************************************************************************/
    2548             : 
    2549             : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
    2550             : /* data-types. */
    2551             : 
    2552     2369710 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
    2553             :                            GPtrDiff_t iSrcOffset, int nHalfSrcLen,
    2554             :                            double *padfDensity, double adfReal[],
    2555             :                            double *padfImag)
    2556             : {
    2557             :     // We know that nSrcLen is even, so we can *always* unroll loops 2x.
    2558     2369710 :     const int nSrcLen = nHalfSrcLen * 2;
    2559     2369710 :     bool bHasValid = false;
    2560             : 
    2561     2369710 :     if (padfDensity != nullptr)
    2562             :     {
    2563             :         // Init the density.
    2564     3384030 :         for (int i = 0; i < nSrcLen; i += 2)
    2565             :         {
    2566     2211910 :             padfDensity[i] = 1.0;
    2567     2211910 :             padfDensity[i + 1] = 1.0;
    2568             :         }
    2569             : 
    2570     1172120 :         if (poWK->panUnifiedSrcValid != nullptr)
    2571             :         {
    2572     3281460 :             for (int i = 0; i < nSrcLen; i += 2)
    2573             :             {
    2574     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
    2575     2067740 :                     bHasValid = true;
    2576             :                 else
    2577       74323 :                     padfDensity[i] = 0.0;
    2578             : 
    2579     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
    2580     2068400 :                     bHasValid = true;
    2581             :                 else
    2582       73668 :                     padfDensity[i + 1] = 0.0;
    2583             :             }
    2584             : 
    2585             :             // Reset or fail as needed.
    2586     1139400 :             if (bHasValid)
    2587     1116590 :                 bHasValid = false;
    2588             :             else
    2589       22806 :                 return false;
    2590             :         }
    2591             : 
    2592     1149320 :         if (poWK->papanBandSrcValid != nullptr &&
    2593           0 :             poWK->papanBandSrcValid[iBand] != nullptr)
    2594             :         {
    2595           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2596             :             {
    2597           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
    2598           0 :                     bHasValid = true;
    2599             :                 else
    2600           0 :                     padfDensity[i] = 0.0;
    2601             : 
    2602           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
    2603           0 :                                iSrcOffset + i + 1))
    2604           0 :                     bHasValid = true;
    2605             :                 else
    2606           0 :                     padfDensity[i + 1] = 0.0;
    2607             :             }
    2608             : 
    2609             :             // Reset or fail as needed.
    2610           0 :             if (bHasValid)
    2611           0 :                 bHasValid = false;
    2612             :             else
    2613           0 :                 return false;
    2614             :         }
    2615             :     }
    2616             : 
    2617             :     // TODO(schwehr): Fix casting.
    2618             :     // Fetch data.
    2619     2346910 :     switch (poWK->eWorkingDataType)
    2620             :     {
    2621     1136680 :         case GDT_UInt8:
    2622             :         {
    2623     1136680 :             GByte *pSrc =
    2624     1136680 :                 reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
    2625     1136680 :             pSrc += iSrcOffset;
    2626     3281570 :             for (int i = 0; i < nSrcLen; i += 2)
    2627             :             {
    2628     2144890 :                 adfReal[i] = pSrc[i];
    2629     2144890 :                 adfReal[i + 1] = pSrc[i + 1];
    2630             :             }
    2631     1136680 :             break;
    2632             :         }
    2633             : 
    2634           0 :         case GDT_Int8:
    2635             :         {
    2636           0 :             GInt8 *pSrc =
    2637           0 :                 reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
    2638           0 :             pSrc += iSrcOffset;
    2639           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2640             :             {
    2641           0 :                 adfReal[i] = pSrc[i];
    2642           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2643             :             }
    2644           0 :             break;
    2645             :         }
    2646             : 
    2647        5950 :         case GDT_Int16:
    2648             :         {
    2649        5950 :             GInt16 *pSrc =
    2650        5950 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2651        5950 :             pSrc += iSrcOffset;
    2652       22164 :             for (int i = 0; i < nSrcLen; i += 2)
    2653             :             {
    2654       16214 :                 adfReal[i] = pSrc[i];
    2655       16214 :                 adfReal[i + 1] = pSrc[i + 1];
    2656             :             }
    2657        5950 :             break;
    2658             :         }
    2659             : 
    2660        4310 :         case GDT_UInt16:
    2661             :         {
    2662        4310 :             GUInt16 *pSrc =
    2663        4310 :                 reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
    2664        4310 :             pSrc += iSrcOffset;
    2665       18884 :             for (int i = 0; i < nSrcLen; i += 2)
    2666             :             {
    2667       14574 :                 adfReal[i] = pSrc[i];
    2668       14574 :                 adfReal[i + 1] = pSrc[i + 1];
    2669             :             }
    2670        4310 :             break;
    2671             :         }
    2672             : 
    2673         946 :         case GDT_Int32:
    2674             :         {
    2675         946 :             GInt32 *pSrc =
    2676         946 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2677         946 :             pSrc += iSrcOffset;
    2678        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2679             :             {
    2680        1678 :                 adfReal[i] = pSrc[i];
    2681        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2682             :             }
    2683         946 :             break;
    2684             :         }
    2685             : 
    2686         946 :         case GDT_UInt32:
    2687             :         {
    2688         946 :             GUInt32 *pSrc =
    2689         946 :                 reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
    2690         946 :             pSrc += iSrcOffset;
    2691        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2692             :             {
    2693        1678 :                 adfReal[i] = pSrc[i];
    2694        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2695             :             }
    2696         946 :             break;
    2697             :         }
    2698             : 
    2699         196 :         case GDT_Int64:
    2700             :         {
    2701         196 :             auto pSrc =
    2702         196 :                 reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
    2703         196 :             pSrc += iSrcOffset;
    2704         392 :             for (int i = 0; i < nSrcLen; i += 2)
    2705             :             {
    2706         196 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2707         196 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2708             :             }
    2709         196 :             break;
    2710             :         }
    2711             : 
    2712         196 :         case GDT_UInt64:
    2713             :         {
    2714         196 :             auto pSrc =
    2715         196 :                 reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
    2716         196 :             pSrc += iSrcOffset;
    2717         392 :             for (int i = 0; i < nSrcLen; i += 2)
    2718             :             {
    2719         196 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2720         196 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2721             :             }
    2722         196 :             break;
    2723             :         }
    2724             : 
    2725           0 :         case GDT_Float16:
    2726             :         {
    2727           0 :             GFloat16 *pSrc =
    2728           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2729           0 :             pSrc += iSrcOffset;
    2730           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2731             :             {
    2732           0 :                 adfReal[i] = pSrc[i];
    2733           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2734             :             }
    2735           0 :             break;
    2736             :         }
    2737             : 
    2738       25270 :         case GDT_Float32:
    2739             :         {
    2740       25270 :             float *pSrc =
    2741       25270 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2742       25270 :             pSrc += iSrcOffset;
    2743      121739 :             for (int i = 0; i < nSrcLen; i += 2)
    2744             :             {
    2745       96469 :                 adfReal[i] = double(pSrc[i]);
    2746       96469 :                 adfReal[i + 1] = double(pSrc[i + 1]);
    2747             :             }
    2748       25270 :             break;
    2749             :         }
    2750             : 
    2751         946 :         case GDT_Float64:
    2752             :         {
    2753         946 :             double *pSrc =
    2754         946 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2755         946 :             pSrc += iSrcOffset;
    2756        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2757             :             {
    2758        1678 :                 adfReal[i] = pSrc[i];
    2759        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2760             :             }
    2761         946 :             break;
    2762             :         }
    2763             : 
    2764     1169220 :         case GDT_CInt16:
    2765             :         {
    2766     1169220 :             GInt16 *pSrc =
    2767     1169220 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2768     1169220 :             pSrc += 2 * iSrcOffset;
    2769     4676020 :             for (int i = 0; i < nSrcLen; i += 2)
    2770             :             {
    2771     3506800 :                 adfReal[i] = pSrc[2 * i];
    2772     3506800 :                 padfImag[i] = pSrc[2 * i + 1];
    2773             : 
    2774     3506800 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2775     3506800 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2776             :             }
    2777     1169220 :             break;
    2778             :         }
    2779             : 
    2780         750 :         case GDT_CInt32:
    2781             :         {
    2782         750 :             GInt32 *pSrc =
    2783         750 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2784         750 :             pSrc += 2 * iSrcOffset;
    2785        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2786             :             {
    2787        1482 :                 adfReal[i] = pSrc[2 * i];
    2788        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2789             : 
    2790        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2791        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2792             :             }
    2793         750 :             break;
    2794             :         }
    2795             : 
    2796           0 :         case GDT_CFloat16:
    2797             :         {
    2798           0 :             GFloat16 *pSrc =
    2799           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2800           0 :             pSrc += 2 * iSrcOffset;
    2801           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2802             :             {
    2803           0 :                 adfReal[i] = pSrc[2 * i];
    2804           0 :                 padfImag[i] = pSrc[2 * i + 1];
    2805             : 
    2806           0 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2807           0 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2808             :             }
    2809           0 :             break;
    2810             :         }
    2811             : 
    2812         750 :         case GDT_CFloat32:
    2813             :         {
    2814         750 :             float *pSrc =
    2815         750 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2816         750 :             pSrc += 2 * iSrcOffset;
    2817        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2818             :             {
    2819        1482 :                 adfReal[i] = double(pSrc[2 * i]);
    2820        1482 :                 padfImag[i] = double(pSrc[2 * i + 1]);
    2821             : 
    2822        1482 :                 adfReal[i + 1] = double(pSrc[2 * i + 2]);
    2823        1482 :                 padfImag[i + 1] = double(pSrc[2 * i + 3]);
    2824             :             }
    2825         750 :             break;
    2826             :         }
    2827             : 
    2828         750 :         case GDT_CFloat64:
    2829             :         {
    2830         750 :             double *pSrc =
    2831         750 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2832         750 :             pSrc += 2 * iSrcOffset;
    2833        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2834             :             {
    2835        1482 :                 adfReal[i] = pSrc[2 * i];
    2836        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2837             : 
    2838        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2839        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2840             :             }
    2841         750 :             break;
    2842             :         }
    2843             : 
    2844           0 :         case GDT_Unknown:
    2845             :         case GDT_TypeCount:
    2846           0 :             CPLAssert(false);
    2847             :             if (padfDensity)
    2848             :                 memset(padfDensity, 0, nSrcLen * sizeof(double));
    2849             :             return false;
    2850             :     }
    2851             : 
    2852     2346910 :     if (padfDensity == nullptr)
    2853     1197590 :         return true;
    2854             : 
    2855     1149320 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2856             :     {
    2857     3256740 :         for (int i = 0; i < nSrcLen; i += 2)
    2858             :         {
    2859             :             // Take into account earlier calcs.
    2860     2127390 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2861             :             {
    2862     2087480 :                 padfDensity[i] = 1.0;
    2863     2087480 :                 bHasValid = true;
    2864             :             }
    2865             : 
    2866     2127390 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2867             :             {
    2868     2088140 :                 padfDensity[i + 1] = 1.0;
    2869     2088140 :                 bHasValid = true;
    2870             :             }
    2871             :         }
    2872             :     }
    2873             :     else
    2874             :     {
    2875       70068 :         for (int i = 0; i < nSrcLen; i += 2)
    2876             :         {
    2877       50103 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2878       50103 :                 padfDensity[i] =
    2879       50103 :                     double(poWK->pafUnifiedSrcDensity[iSrcOffset + i]);
    2880       50103 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2881       49252 :                 bHasValid = true;
    2882             : 
    2883       50103 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2884       50103 :                 padfDensity[i + 1] =
    2885       50103 :                     double(poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]);
    2886       50103 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2887       49170 :                 bHasValid = true;
    2888             :         }
    2889             :     }
    2890             : 
    2891     1149320 :     return bHasValid;
    2892             : }
    2893             : 
    2894             : /************************************************************************/
    2895             : /*                            GWKGetPixelT()                            */
    2896             : /************************************************************************/
    2897             : 
    2898             : template <class T>
    2899    14964159 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
    2900             :                          GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
    2901             : 
    2902             : {
    2903    14964159 :     T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2904             : 
    2905    33171143 :     if ((poWK->panUnifiedSrcValid != nullptr &&
    2906    29928318 :          !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
    2907    14964159 :         (poWK->papanBandSrcValid != nullptr &&
    2908      589836 :          poWK->papanBandSrcValid[iBand] != nullptr &&
    2909      589836 :          !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
    2910             :     {
    2911           9 :         *pdfDensity = 0.0;
    2912           9 :         return false;
    2913             :     }
    2914             : 
    2915    14964159 :     *pValue = pSrc[iSrcOffset];
    2916             : 
    2917    14964159 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2918    13841766 :         *pdfDensity = 1.0;
    2919             :     else
    2920     1122362 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2921             : 
    2922    14964159 :     return *pdfDensity != 0.0;
    2923             : }
    2924             : 
    2925             : /************************************************************************/
    2926             : /*                        GWKBilinearResample()                         */
    2927             : /*     Set of bilinear interpolators                                    */
    2928             : /************************************************************************/
    2929             : 
    2930       77448 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2931             :                                        double dfSrcX, double dfSrcY,
    2932             :                                        double *pdfDensity, double *pdfReal,
    2933             :                                        double *pdfImag)
    2934             : 
    2935             : {
    2936             :     // Save as local variables to avoid following pointers.
    2937       77448 :     const int nSrcXSize = poWK->nSrcXSize;
    2938       77448 :     const int nSrcYSize = poWK->nSrcYSize;
    2939             : 
    2940       77448 :     int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2941       77448 :     int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2942       77448 :     double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2943       77448 :     double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2944       77448 :     bool bShifted = false;
    2945             : 
    2946       77448 :     if (iSrcX == -1)
    2947             :     {
    2948        1534 :         iSrcX = 0;
    2949        1534 :         dfRatioX = 1;
    2950             :     }
    2951       77448 :     if (iSrcY == -1)
    2952             :     {
    2953        7734 :         iSrcY = 0;
    2954        7734 :         dfRatioY = 1;
    2955             :     }
    2956       77448 :     GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    2957             : 
    2958             :     // Shift so we don't overrun the array.
    2959       77448 :     if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
    2960       77330 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
    2961       77330 :             iSrcOffset + nSrcXSize + 1)
    2962             :     {
    2963         230 :         bShifted = true;
    2964         230 :         --iSrcOffset;
    2965             :     }
    2966             : 
    2967       77448 :     double adfDensity[2] = {0.0, 0.0};
    2968       77448 :     double adfReal[2] = {0.0, 0.0};
    2969       77448 :     double adfImag[2] = {0.0, 0.0};
    2970       77448 :     double dfAccumulatorReal = 0.0;
    2971       77448 :     double dfAccumulatorImag = 0.0;
    2972       77448 :     double dfAccumulatorDensity = 0.0;
    2973       77448 :     double dfAccumulatorDivisor = 0.0;
    2974             : 
    2975       77448 :     const GPtrDiff_t nSrcPixels =
    2976       77448 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
    2977             :     // Get pixel row.
    2978       77448 :     if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
    2979      154896 :         iSrcOffset < nSrcPixels &&
    2980       77448 :         GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
    2981             :                        adfImag))
    2982             :     {
    2983       71504 :         double dfMult1 = dfRatioX * dfRatioY;
    2984       71504 :         double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
    2985             : 
    2986             :         // Shifting corrected.
    2987       71504 :         if (bShifted)
    2988             :         {
    2989         230 :             adfReal[0] = adfReal[1];
    2990         230 :             adfImag[0] = adfImag[1];
    2991         230 :             adfDensity[0] = adfDensity[1];
    2992             :         }
    2993             : 
    2994             :         // Upper Left Pixel.
    2995       71504 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2996       71504 :             adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2997             :         {
    2998       66050 :             dfAccumulatorDivisor += dfMult1;
    2999             : 
    3000       66050 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    3001       66050 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    3002       66050 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    3003             :         }
    3004             : 
    3005             :         // Upper Right Pixel.
    3006       71504 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    3007       70609 :             adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3008             :         {
    3009       65335 :             dfAccumulatorDivisor += dfMult2;
    3010             : 
    3011       65335 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    3012       65335 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    3013       65335 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    3014             :         }
    3015             :     }
    3016             : 
    3017             :     // Get pixel row.
    3018       77448 :     if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
    3019      228032 :         iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
    3020       73136 :         GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
    3021             :                        adfReal, adfImag))
    3022             :     {
    3023       67577 :         double dfMult1 = dfRatioX * (1.0 - dfRatioY);
    3024       67577 :         double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    3025             : 
    3026             :         // Shifting corrected
    3027       67577 :         if (bShifted)
    3028             :         {
    3029         112 :             adfReal[0] = adfReal[1];
    3030         112 :             adfImag[0] = adfImag[1];
    3031         112 :             adfDensity[0] = adfDensity[1];
    3032             :         }
    3033             : 
    3034             :         // Lower Left Pixel
    3035       67577 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    3036       67577 :             adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3037             :         {
    3038       62298 :             dfAccumulatorDivisor += dfMult1;
    3039             : 
    3040       62298 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    3041       62298 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    3042       62298 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    3043             :         }
    3044             : 
    3045             :         // Lower Right Pixel.
    3046       67577 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    3047       66800 :             adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3048             :         {
    3049       61823 :             dfAccumulatorDivisor += dfMult2;
    3050             : 
    3051       61823 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    3052       61823 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    3053       61823 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    3054             :         }
    3055             :     }
    3056             : 
    3057             :     /* -------------------------------------------------------------------- */
    3058             :     /*      Return result.                                                  */
    3059             :     /* -------------------------------------------------------------------- */
    3060       77448 :     if (dfAccumulatorDivisor == 1.0)
    3061             :     {
    3062       45929 :         *pdfReal = dfAccumulatorReal;
    3063       45929 :         *pdfImag = dfAccumulatorImag;
    3064       45929 :         *pdfDensity = dfAccumulatorDensity;
    3065       45929 :         return false;
    3066             :     }
    3067       31519 :     else if (dfAccumulatorDivisor < 0.00001)
    3068             :     {
    3069           0 :         *pdfReal = 0.0;
    3070           0 :         *pdfImag = 0.0;
    3071           0 :         *pdfDensity = 0.0;
    3072           0 :         return false;
    3073             :     }
    3074             :     else
    3075             :     {
    3076       31519 :         *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
    3077       31519 :         *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
    3078       31519 :         *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
    3079       31519 :         return true;
    3080             :     }
    3081             : }
    3082             : 
    3083             : template <class T>
    3084     8780972 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3085             :                                                int iBand, double dfSrcX,
    3086             :                                                double dfSrcY, T *pValue)
    3087             : 
    3088             : {
    3089             : 
    3090     8780972 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3091     8780972 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3092     8780972 :     GPtrDiff_t iSrcOffset =
    3093     8780972 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3094     8780972 :     const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    3095     8780972 :     const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    3096             : 
    3097     8780972 :     const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    3098             : 
    3099     8780972 :     if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    3100     6025939 :         iSrcY + 1 < poWK->nSrcYSize)
    3101             :     {
    3102     5834442 :         const double dfAccumulator =
    3103     5834442 :             (double(pSrc[iSrcOffset]) * dfRatioX +
    3104     5834442 :              double(pSrc[iSrcOffset + 1]) * (1.0 - dfRatioX)) *
    3105             :                 dfRatioY +
    3106     5834442 :             (double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfRatioX +
    3107     5834442 :              double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) *
    3108     5834442 :                  (1.0 - dfRatioX)) *
    3109     5834442 :                 (1.0 - dfRatioY);
    3110             : 
    3111     5834442 :         *pValue = GWKRoundValueT<T>(dfAccumulator);
    3112             : 
    3113     5834442 :         return true;
    3114             :     }
    3115             : 
    3116     2946530 :     double dfAccumulatorDivisor = 0.0;
    3117     2946530 :     double dfAccumulator = 0.0;
    3118             : 
    3119             :     // Upper Left Pixel.
    3120     2946530 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
    3121      564876 :         iSrcY < poWK->nSrcYSize)
    3122             :     {
    3123      564876 :         const double dfMult = dfRatioX * dfRatioY;
    3124             : 
    3125      564876 :         dfAccumulatorDivisor += dfMult;
    3126             : 
    3127      564876 :         dfAccumulator += double(pSrc[iSrcOffset]) * dfMult;
    3128             :     }
    3129             : 
    3130             :     // Upper Right Pixel.
    3131     2946530 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    3132     2261926 :         iSrcY < poWK->nSrcYSize)
    3133             :     {
    3134     2261926 :         const double dfMult = (1.0 - dfRatioX) * dfRatioY;
    3135             : 
    3136     2261926 :         dfAccumulatorDivisor += dfMult;
    3137             : 
    3138     2261926 :         dfAccumulator += double(pSrc[iSrcOffset + 1]) * dfMult;
    3139             :     }
    3140             : 
    3141             :     // Lower Right Pixel.
    3142     2946530 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    3143     2512924 :         iSrcY + 1 < poWK->nSrcYSize)
    3144             :     {
    3145     2261243 :         const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    3146             : 
    3147     2261243 :         dfAccumulatorDivisor += dfMult;
    3148             : 
    3149     2261243 :         dfAccumulator +=
    3150     2261243 :             double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) * dfMult;
    3151             :     }
    3152             : 
    3153             :     // Lower Left Pixel.
    3154     2946530 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    3155      815601 :         iSrcY + 1 < poWK->nSrcYSize)
    3156             :     {
    3157      563917 :         const double dfMult = dfRatioX * (1.0 - dfRatioY);
    3158             : 
    3159      563917 :         dfAccumulatorDivisor += dfMult;
    3160             : 
    3161      563917 :         dfAccumulator += double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfMult;
    3162             :     }
    3163             : 
    3164             :     /* -------------------------------------------------------------------- */
    3165             :     /*      Return result.                                                  */
    3166             :     /* -------------------------------------------------------------------- */
    3167     2946530 :     double dfValue = 0.0;
    3168             : 
    3169     2946530 :     if (dfAccumulatorDivisor < 0.00001)
    3170             :     {
    3171           0 :         *pValue = 0;
    3172           0 :         return false;
    3173             :     }
    3174     2946530 :     else if (dfAccumulatorDivisor == 1.0)
    3175             :     {
    3176       22176 :         dfValue = dfAccumulator;
    3177             :     }
    3178             :     else
    3179             :     {
    3180     2924358 :         dfValue = dfAccumulator / dfAccumulatorDivisor;
    3181             :     }
    3182             : 
    3183     2946530 :     *pValue = GWKRoundValueT<T>(dfValue);
    3184             : 
    3185     2946530 :     return true;
    3186             : }
    3187             : 
    3188             : /************************************************************************/
    3189             : /*                        GWKCubicResample()                            */
    3190             : /*     Set of bicubic interpolators using cubic convolution.            */
    3191             : /************************************************************************/
    3192             : 
    3193             : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
    3194             : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
    3195             : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
    3196             : 
    3197             : template <typename T>
    3198     1810720 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
    3199             :                                  T f1, T f2, T f3)
    3200             : {
    3201     1810720 :     return (f1 + T(0.5) * (distance1 * (f2 - f0) +
    3202     1810720 :                            distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
    3203     1810720 :                            distance3 * (3 * (f1 - f2) + f3 - f0)));
    3204             : }
    3205             : 
    3206             : /************************************************************************/
    3207             : /*                       GWKCubicComputeWeights()                       */
    3208             : /************************************************************************/
    3209             : 
    3210             : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
    3211             : 
    3212             : template <typename T>
    3213    97650960 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
    3214             : {
    3215    97650960 :     const T halfX = T(0.5) * x;
    3216    97650960 :     const T threeX = T(3.0) * x;
    3217    97650960 :     const T halfX2 = halfX * x;
    3218             : 
    3219    97650960 :     coeffs[0] = halfX * (-1 + x * (2 - x));
    3220    97650960 :     coeffs[1] = 1 + halfX2 * (-5 + threeX);
    3221    97650960 :     coeffs[2] = halfX * (1 + x * (4 - threeX));
    3222    97650960 :     coeffs[3] = halfX2 * (-1 + x);
    3223    97650960 : }
    3224             : 
    3225    14682546 : template <typename T> inline double CONVOL4(const double v1[4], const T v2[4])
    3226             : {
    3227    14682546 :     return v1[0] * double(v2[0]) + v1[1] * double(v2[1]) +
    3228    14682546 :            v1[2] * double(v2[2]) + v1[3] * double(v2[3]);
    3229             : }
    3230             : 
    3231             : #if 0
    3232             : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
    3233             : // instead of 17.
    3234             : // TODO(schwehr): Use an inline function.
    3235             : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX)             \
    3236             :     {                                                                          \
    3237             :         const double dfX = dfX_;                                               \
    3238             :         dfHalfX = 0.5 * dfX;                                                   \
    3239             :         const double dfThreeX = 3.0 * dfX;                                     \
    3240             :         const double dfXMinus1 = dfX - 1;                                      \
    3241             :                                                                                \
    3242             :         adfCoeffs[0] = -1 + dfX * (2 - dfX);                                   \
    3243             :         adfCoeffs[1] = dfX * (-5 + dfThreeX);                                  \
    3244             :         /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/                           \
    3245             :         adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1];                              \
    3246             :         /*adfCoeffs[3] = dfX * (-1 + dfX); */                                  \
    3247             :         adfCoeffs[3] = dfXMinus1 - adfCoeffs[0];                               \
    3248             :     }
    3249             : 
    3250             : // TODO(schwehr): Use an inline function.
    3251             : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX)                               \
    3252             :     ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
    3253             :                            (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
    3254             : #endif
    3255             : 
    3256      302045 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
    3257             :                                     double dfSrcX, double dfSrcY,
    3258             :                                     double *pdfDensity, double *pdfReal,
    3259             :                                     double *pdfImag)
    3260             : 
    3261             : {
    3262      302045 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3263      302045 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3264      302045 :     GPtrDiff_t iSrcOffset =
    3265      302045 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3266      302045 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3267      302045 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3268      302045 :     double adfDensity[4] = {};
    3269      302045 :     double adfReal[4] = {};
    3270      302045 :     double adfImag[4] = {};
    3271             : 
    3272             :     // Get the bilinear interpolation at the image borders.
    3273      302045 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3274      286140 :         iSrcY + 2 >= poWK->nSrcYSize)
    3275       24670 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3276       24670 :                                           pdfDensity, pdfReal, pdfImag);
    3277             : 
    3278      277375 :     double adfValueDens[4] = {};
    3279      277375 :     double adfValueReal[4] = {};
    3280      277375 :     double adfValueImag[4] = {};
    3281             : 
    3282      277375 :     double adfCoeffsX[4] = {};
    3283      277375 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3284             : 
    3285     1240570 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3286             :     {
    3287     1009640 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3288      998035 :                             2, adfDensity, adfReal, adfImag) ||
    3289      998035 :             adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3290      980395 :             adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3291     2979770 :             adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3292      972094 :             adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
    3293             :         {
    3294       46449 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3295       46449 :                                               pdfDensity, pdfReal, pdfImag);
    3296             :         }
    3297             : 
    3298      963196 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3299      963196 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3300      963196 :         adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
    3301             :     }
    3302             : 
    3303             :     /* -------------------------------------------------------------------- */
    3304             :     /*      For now, if we have any pixels missing in the kernel area,      */
    3305             :     /*      we fallback on using bilinear interpolation.  Ideally we        */
    3306             :     /*      should do "weight adjustment" of our results similarly to       */
    3307             :     /*      what is done for the cubic spline and lanc. interpolators.      */
    3308             :     /* -------------------------------------------------------------------- */
    3309             : 
    3310      230926 :     double adfCoeffsY[4] = {};
    3311      230926 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3312             : 
    3313      230926 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3314      230926 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3315      230926 :     *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
    3316             : 
    3317      230926 :     return true;
    3318             : }
    3319             : 
    3320             : #ifdef USE_SSE2
    3321             : 
    3322             : /************************************************************************/
    3323             : /*                           XMMLoad4Values()                           */
    3324             : /*                                                                      */
    3325             : /*  Load 4 packed byte or uint16, cast them to float and put them in a  */
    3326             : /*  m128 register.                                                      */
    3327             : /************************************************************************/
    3328             : 
    3329   566236000 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
    3330             : {
    3331             :     unsigned int i;
    3332   566236000 :     memcpy(&i, ptr, 4);
    3333  1132470000 :     __m128i xmm_i = _mm_cvtsi32_si128(i);
    3334             :     // Zero extend 4 packed unsigned 8-bit integers in a to packed
    3335             :     // 32-bit integers.
    3336             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3337             :     xmm_i = _mm_cvtepu8_epi32(xmm_i);
    3338             : #else
    3339  1132470000 :     xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
    3340  1132470000 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3341             : #endif
    3342  1132470000 :     return _mm_cvtepi32_ps(xmm_i);
    3343             : }
    3344             : 
    3345     1108340 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
    3346             : {
    3347             :     GUInt64 i;
    3348     1108340 :     memcpy(&i, ptr, 8);
    3349     2216690 :     __m128i xmm_i = _mm_cvtsi64_si128(i);
    3350             :     // Zero extend 4 packed unsigned 16-bit integers in a to packed
    3351             :     // 32-bit integers.
    3352             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3353             :     xmm_i = _mm_cvtepu16_epi32(xmm_i);
    3354             : #else
    3355     2216690 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3356             : #endif
    3357     2216690 :     return _mm_cvtepi32_ps(xmm_i);
    3358             : }
    3359             : 
    3360             : /************************************************************************/
    3361             : /*                           XMMHorizontalAdd()                         */
    3362             : /*                                                                      */
    3363             : /*  Return the sum of the 4 floating points of the register.            */
    3364             : /************************************************************************/
    3365             : 
    3366             : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
    3367             : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3368             : {
    3369             :     __m128 shuf = _mm_movehdup_ps(v);   // (v3   , v3   , v1   , v1)
    3370             :     __m128 sums = _mm_add_ps(v, shuf);  // (v3+v3, v3+v2, v1+v1, v1+v0)
    3371             :     shuf = _mm_movehl_ps(shuf, sums);   // (v3   , v3   , v3+v3, v3+v2)
    3372             :     sums = _mm_add_ss(sums, shuf);      // (v1+v0)+(v3+v2)
    3373             :     return _mm_cvtss_f32(sums);
    3374             : }
    3375             : #else
    3376   141836000 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3377             : {
    3378   141836000 :     __m128 shuf = _mm_movehl_ps(v, v);     // (v3   , v2   , v3   , v2)
    3379   141836000 :     __m128 sums = _mm_add_ps(v, shuf);     // (v3+v3, v2+v2, v3+v1, v2+v0)
    3380   141836000 :     shuf = _mm_shuffle_ps(sums, sums, 1);  // (v2+v0, v2+v0, v2+v0, v3+v1)
    3381   141836000 :     sums = _mm_add_ss(sums, shuf);         // (v2+v0)+(v3+v1)
    3382   141836000 :     return _mm_cvtss_f32(sums);
    3383             : }
    3384             : #endif
    3385             : 
    3386             : #endif  // define USE_SSE2
    3387             : 
    3388             : /************************************************************************/
    3389             : /*            GWKCubicResampleSrcMaskIsDensity4SampleRealT()            */
    3390             : /************************************************************************/
    3391             : 
    3392             : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
    3393             : // because there are a few assumptions above those types.
    3394             : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
    3395             : // perf benefit.
    3396             : 
    3397             : template <class T>
    3398      389755 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
    3399             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3400             :     double *pdfDensity, double *pdfReal)
    3401             : {
    3402      389755 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3403      389755 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3404      389755 :     const GPtrDiff_t iSrcOffset =
    3405      389755 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3406             : 
    3407             :     // Get the bilinear interpolation at the image borders.
    3408      389755 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3409      387271 :         iSrcY + 2 >= poWK->nSrcYSize)
    3410             :     {
    3411        2484 :         double adfImagIgnored[4] = {};
    3412        2484 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3413        2484 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3414             :     }
    3415             : 
    3416             : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3417             :     const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
    3418             :     const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
    3419             : 
    3420             :     // TODO(schwehr): Explain the magic numbers.
    3421             :     float afTemp[4 + 4 + 4 + 1];
    3422             :     float *pafAligned =
    3423             :         reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
    3424             :     float *pafCoeffs = pafAligned;
    3425             :     float *pafDensity = pafAligned + 4;
    3426             :     float *pafValue = pafAligned + 8;
    3427             : 
    3428             :     const float fHalfDeltaX = 0.5f * fDeltaX;
    3429             :     const float fThreeDeltaX = 3.0f * fDeltaX;
    3430             :     const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
    3431             : 
    3432             :     pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
    3433             :     pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
    3434             :     pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
    3435             :     pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
    3436             :     __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
    3437             :     const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD_FLOAT);
    3438             : 
    3439             :     __m128 xmmMaskLowDensity = _mm_setzero_ps();
    3440             :     for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
    3441             :          i++, iOffset += poWK->nSrcXSize)
    3442             :     {
    3443             :         const __m128 xmmDensity =
    3444             :             _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
    3445             :         xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
    3446             :                                       _mm_cmplt_ps(xmmDensity, xmmThreshold));
    3447             :         pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3448             : 
    3449             :         const __m128 xmmValues =
    3450             :             XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
    3451             :         pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
    3452             :     }
    3453             :     if (_mm_movemask_ps(xmmMaskLowDensity))
    3454             :     {
    3455             :         double adfImagIgnored[4] = {};
    3456             :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3457             :                                           pdfDensity, pdfReal, adfImagIgnored);
    3458             :     }
    3459             : 
    3460             :     const float fHalfDeltaY = 0.5f * fDeltaY;
    3461             :     const float fThreeDeltaY = 3.0f * fDeltaY;
    3462             :     const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
    3463             : 
    3464             :     pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
    3465             :     pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
    3466             :     pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
    3467             :     pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
    3468             : 
    3469             :     xmmCoeffs = _mm_load_ps(pafCoeffs);
    3470             : 
    3471             :     const __m128 xmmDensity = _mm_load_ps(pafDensity);
    3472             :     const __m128 xmmValue = _mm_load_ps(pafValue);
    3473             :     *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3474             :     *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
    3475             : 
    3476             :     // We did all above computations on float32 whereas the general case is
    3477             :     // float64. Not sure if one is fundamentally more correct than the other
    3478             :     // one, but we want our optimization to give the same result as the
    3479             :     // general case as much as possible, so if the resulting value is
    3480             :     // close to some_int_value + 0.5, redo the computation with the general
    3481             :     // case.
    3482             :     // Note: If other types than Byte or UInt16, will need changes.
    3483             :     if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
    3484             :         return true;
    3485             : 
    3486             : #endif  // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3487             : 
    3488      387271 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3489      387271 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3490             : 
    3491      387271 :     double adfValueDens[4] = {};
    3492      387271 :     double adfValueReal[4] = {};
    3493             : 
    3494      387271 :     double adfCoeffsX[4] = {};
    3495      387271 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3496             : 
    3497      387271 :     double adfCoeffsY[4] = {};
    3498      387271 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3499             : 
    3500     1930200 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3501             :     {
    3502     1544480 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3503             : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
    3504     1544480 :         if (poWK->pafUnifiedSrcDensity[iOffset + 0] <
    3505     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3506     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 1] <
    3507     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3508     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 2] <
    3509     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3510     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 3] <
    3511             :                 SRC_DENSITY_THRESHOLD_FLOAT)
    3512             :         {
    3513        1551 :             double adfImagIgnored[4] = {};
    3514        1551 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3515             :                                               pdfDensity, pdfReal,
    3516        1551 :                                               adfImagIgnored);
    3517             :         }
    3518             : #endif
    3519             : 
    3520     3085860 :         adfValueDens[i + 1] =
    3521     1542930 :             CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
    3522             : 
    3523     1542930 :         adfValueReal[i + 1] = CONVOL4(
    3524             :             adfCoeffsX,
    3525     1542930 :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3526             :     }
    3527             : 
    3528      385720 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3529      385720 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3530             : 
    3531      385720 :     return true;
    3532             : }
    3533             : 
    3534             : /************************************************************************/
    3535             : /*              GWKCubicResampleSrcMaskIsDensity4SampleReal()             */
    3536             : /*     Bi-cubic when source has and only has pafUnifiedSrcDensity.      */
    3537             : /************************************************************************/
    3538             : 
    3539           0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
    3540             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3541             :     double *pdfDensity, double *pdfReal)
    3542             : 
    3543             : {
    3544           0 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3545           0 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3546           0 :     const GPtrDiff_t iSrcOffset =
    3547           0 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3548           0 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3549           0 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3550             : 
    3551             :     // Get the bilinear interpolation at the image borders.
    3552           0 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3553           0 :         iSrcY + 2 >= poWK->nSrcYSize)
    3554             :     {
    3555           0 :         double adfImagIgnored[4] = {};
    3556           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3557           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3558             :     }
    3559             : 
    3560           0 :     double adfCoeffsX[4] = {};
    3561           0 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3562             : 
    3563           0 :     double adfCoeffsY[4] = {};
    3564           0 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3565             : 
    3566           0 :     double adfValueDens[4] = {};
    3567           0 :     double adfValueReal[4] = {};
    3568           0 :     double adfDensity[4] = {};
    3569           0 :     double adfReal[4] = {};
    3570           0 :     double adfImagIgnored[4] = {};
    3571             : 
    3572           0 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3573             :     {
    3574           0 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3575           0 :                             2, adfDensity, adfReal, adfImagIgnored) ||
    3576           0 :             adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3577           0 :             adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3578           0 :             adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3579           0 :             adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
    3580             :         {
    3581           0 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3582             :                                               pdfDensity, pdfReal,
    3583           0 :                                               adfImagIgnored);
    3584             :         }
    3585             : 
    3586           0 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3587           0 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3588             :     }
    3589             : 
    3590           0 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3591           0 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3592             : 
    3593           0 :     return true;
    3594             : }
    3595             : 
    3596             : template <class T>
    3597     2300964 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3598             :                                             int iBand, double dfSrcX,
    3599             :                                             double dfSrcY, T *pValue)
    3600             : 
    3601             : {
    3602     2300964 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3603     2300964 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3604     2300964 :     const GPtrDiff_t iSrcOffset =
    3605     2300964 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3606     2300964 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3607     2300964 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3608     2300964 :     const double dfDeltaY2 = dfDeltaY * dfDeltaY;
    3609     2300964 :     const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
    3610             : 
    3611             :     // Get the bilinear interpolation at the image borders.
    3612     2300964 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3613     1883033 :         iSrcY + 2 >= poWK->nSrcYSize)
    3614      490244 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    3615      490244 :                                                   pValue);
    3616             : 
    3617     1810720 :     double adfCoeffs[4] = {};
    3618     1810720 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
    3619             : 
    3620     1810720 :     double adfValue[4] = {};
    3621             : 
    3622     9053590 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3623             :     {
    3624     7242876 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3625             : 
    3626     7242876 :         adfValue[i + 1] = CONVOL4(
    3627             :             adfCoeffs,
    3628     7242876 :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3629             :     }
    3630             : 
    3631             :     const double dfValue =
    3632     1810720 :         CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
    3633             :                          adfValue[1], adfValue[2], adfValue[3]);
    3634             : 
    3635     1810720 :     *pValue = GWKClampValueT<T>(dfValue);
    3636             : 
    3637     1810720 :     return true;
    3638             : }
    3639             : 
    3640             : /************************************************************************/
    3641             : /*                           GWKLanczosSinc()                           */
    3642             : /************************************************************************/
    3643             : 
    3644             : /*
    3645             :  * Lanczos windowed sinc interpolation kernel with radius r.
    3646             :  *        /
    3647             :  *        | sinc(x) * sinc(x/r), if |x| < r
    3648             :  * L(x) = | 1, if x = 0                     ,
    3649             :  *        | 0, otherwise
    3650             :  *        \
    3651             :  *
    3652             :  * where sinc(x) = sin(PI * x) / (PI * x).
    3653             :  */
    3654             : 
    3655        1632 : static double GWKLanczosSinc(double dfX)
    3656             : {
    3657        1632 :     if (dfX == 0.0)
    3658           0 :         return 1.0;
    3659             : 
    3660        1632 :     const double dfPIX = M_PI * dfX;
    3661        1632 :     const double dfPIXoverR = dfPIX / 3;
    3662        1632 :     const double dfPIX2overR = dfPIX * dfPIXoverR;
    3663             :     // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3664             :     // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3665        1632 :     const double dfSinPIXoverR = sin(dfPIXoverR);
    3666        1632 :     const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3667        1632 :     const double dfSinPIXMulSinPIXoverR =
    3668        1632 :         (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3669        1632 :     return dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3670             : }
    3671             : 
    3672      106692 : static double GWKLanczosSinc4Values(double *padfValues)
    3673             : {
    3674      533460 :     for (int i = 0; i < 4; i++)
    3675             :     {
    3676      426768 :         if (padfValues[i] == 0.0)
    3677             :         {
    3678           0 :             padfValues[i] = 1.0;
    3679             :         }
    3680             :         else
    3681             :         {
    3682      426768 :             const double dfPIX = M_PI * padfValues[i];
    3683      426768 :             const double dfPIXoverR = dfPIX / 3;
    3684      426768 :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3685             :             // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3686             :             // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3687      426768 :             const double dfSinPIXoverR = sin(dfPIXoverR);
    3688      426768 :             const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3689      426768 :             const double dfSinPIXMulSinPIXoverR =
    3690      426768 :                 (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3691      426768 :             padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3692             :         }
    3693             :     }
    3694      106692 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3695             : }
    3696             : 
    3697             : /************************************************************************/
    3698             : /*                            GWKBilinear()                             */
    3699             : /************************************************************************/
    3700             : 
    3701     6339310 : static double GWKBilinear(double dfX)
    3702             : {
    3703     6339310 :     double dfAbsX = fabs(dfX);
    3704     6339310 :     if (dfAbsX <= 1.0)
    3705     5869990 :         return 1 - dfAbsX;
    3706             :     else
    3707      469322 :         return 0.0;
    3708             : }
    3709             : 
    3710      236458 : static double GWKBilinear4Values(double *padfValues)
    3711             : {
    3712      236458 :     double dfAbsX0 = fabs(padfValues[0]);
    3713      236458 :     double dfAbsX1 = fabs(padfValues[1]);
    3714      236458 :     double dfAbsX2 = fabs(padfValues[2]);
    3715      236458 :     double dfAbsX3 = fabs(padfValues[3]);
    3716      236458 :     if (dfAbsX0 <= 1.0)
    3717      236458 :         padfValues[0] = 1 - dfAbsX0;
    3718             :     else
    3719           0 :         padfValues[0] = 0.0;
    3720      236458 :     if (dfAbsX1 <= 1.0)
    3721      236458 :         padfValues[1] = 1 - dfAbsX1;
    3722             :     else
    3723           0 :         padfValues[1] = 0.0;
    3724      236458 :     if (dfAbsX2 <= 1.0)
    3725      236458 :         padfValues[2] = 1 - dfAbsX2;
    3726             :     else
    3727           0 :         padfValues[2] = 0.0;
    3728      236458 :     if (dfAbsX3 <= 1.0)
    3729      236442 :         padfValues[3] = 1 - dfAbsX3;
    3730             :     else
    3731          16 :         padfValues[3] = 0.0;
    3732      236458 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3733             : }
    3734             : 
    3735             : /************************************************************************/
    3736             : /*                              GWKCubic()                              */
    3737             : /************************************************************************/
    3738             : 
    3739       86934 : static double GWKCubic(double dfX)
    3740             : {
    3741       86934 :     return CubicKernel(dfX);
    3742             : }
    3743             : 
    3744     2963710 : static double GWKCubic4Values(double *padfValues)
    3745             : {
    3746     2963710 :     const double dfAbsX_0 = fabs(padfValues[0]);
    3747     2963710 :     const double dfAbsX_1 = fabs(padfValues[1]);
    3748     2963710 :     const double dfAbsX_2 = fabs(padfValues[2]);
    3749     2963710 :     const double dfAbsX_3 = fabs(padfValues[3]);
    3750     2963710 :     const double dfX2_0 = padfValues[0] * padfValues[0];
    3751     2963710 :     const double dfX2_1 = padfValues[1] * padfValues[1];
    3752     2963710 :     const double dfX2_2 = padfValues[2] * padfValues[2];
    3753     2963710 :     const double dfX2_3 = padfValues[3] * padfValues[3];
    3754             : 
    3755     2963710 :     double dfVal0 = 0.0;
    3756     2963710 :     if (dfAbsX_0 <= 1.0)
    3757     1117140 :         dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
    3758     1846570 :     else if (dfAbsX_0 <= 2.0)
    3759     1846400 :         dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
    3760             : 
    3761     2963710 :     double dfVal1 = 0.0;
    3762     2963710 :     if (dfAbsX_1 <= 1.0)
    3763     1844850 :         dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
    3764     1118860 :     else if (dfAbsX_1 <= 2.0)
    3765     1118860 :         dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
    3766             : 
    3767     2963710 :     double dfVal2 = 0.0;
    3768     2963710 :     if (dfAbsX_2 <= 1.0)
    3769     1855340 :         dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
    3770     1108360 :     else if (dfAbsX_2 <= 2.0)
    3771     1108360 :         dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
    3772             : 
    3773     2963710 :     double dfVal3 = 0.0;
    3774     2963710 :     if (dfAbsX_3 <= 1.0)
    3775     1127350 :         dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
    3776     1836360 :     else if (dfAbsX_3 <= 2.0)
    3777     1836200 :         dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
    3778             : 
    3779     2963710 :     padfValues[0] = dfVal0;
    3780     2963710 :     padfValues[1] = dfVal1;
    3781     2963710 :     padfValues[2] = dfVal2;
    3782     2963710 :     padfValues[3] = dfVal3;
    3783     2963710 :     return dfVal0 + dfVal1 + dfVal2 + dfVal3;
    3784             : }
    3785             : 
    3786             : /************************************************************************/
    3787             : /*                             GWKBSpline()                             */
    3788             : /************************************************************************/
    3789             : 
    3790             : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
    3791             : // Equation 8 with (B,C)=(1,0)
    3792             : // 1/6 * ( 3 * |x|^3 -  6 * |x|^2 + 4) |x| < 1
    3793             : // 1/6 * ( -|x|^3 + 6 |x|^2  - 12|x| + 8) |x| >= 1 and |x| < 2
    3794             : 
    3795      139200 : static double GWKBSpline(double x)
    3796             : {
    3797      139200 :     const double xp2 = x + 2.0;
    3798      139200 :     const double xp1 = x + 1.0;
    3799      139200 :     const double xm1 = x - 1.0;
    3800             : 
    3801             :     // This will most likely be used, so we'll compute it ahead of time to
    3802             :     // avoid stalling the processor.
    3803      139200 :     const double xp2c = xp2 * xp2 * xp2;
    3804             : 
    3805             :     // Note that the test is computed only if it is needed.
    3806             :     // TODO(schwehr): Make this easier to follow.
    3807             :     return xp2 > 0.0
    3808      278400 :                ? ((xp1 > 0.0)
    3809      139200 :                       ? ((x > 0.0)
    3810      124806 :                              ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3811       90308 :                                    6.0 * x * x * x
    3812             :                              : 0.0) +
    3813      124806 :                             -4.0 * xp1 * xp1 * xp1
    3814             :                       : 0.0) +
    3815             :                      xp2c
    3816      139200 :                : 0.0;  // * 0.166666666666666666666
    3817             : }
    3818             : 
    3819     2220680 : static double GWKBSpline4Values(double *padfValues)
    3820             : {
    3821    11103400 :     for (int i = 0; i < 4; i++)
    3822             :     {
    3823     8882740 :         const double x = padfValues[i];
    3824     8882740 :         const double xp2 = x + 2.0;
    3825     8882740 :         const double xp1 = x + 1.0;
    3826     8882740 :         const double xm1 = x - 1.0;
    3827             : 
    3828             :         // This will most likely be used, so we'll compute it ahead of time to
    3829             :         // avoid stalling the processor.
    3830     8882740 :         const double xp2c = xp2 * xp2 * xp2;
    3831             : 
    3832             :         // Note that the test is computed only if it is needed.
    3833             :         // TODO(schwehr): Make this easier to follow.
    3834     8882740 :         padfValues[i] =
    3835             :             (xp2 > 0.0)
    3836    17765500 :                 ? ((xp1 > 0.0)
    3837     8882740 :                        ? ((x > 0.0)
    3838     6661820 :                               ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3839     4438260 :                                     6.0 * x * x * x
    3840             :                               : 0.0) +
    3841     6661820 :                              -4.0 * xp1 * xp1 * xp1
    3842             :                        : 0.0) +
    3843             :                       xp2c
    3844             :                 : 0.0;  // * 0.166666666666666666666
    3845             :     }
    3846     2220680 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3847             : }
    3848             : /************************************************************************/
    3849             : /*                         GWKResampleWrkStruct                         */
    3850             : /************************************************************************/
    3851             : 
    3852             : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
    3853             : 
    3854             : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
    3855             :                                    double dfSrcX, double dfSrcY,
    3856             :                                    double *pdfDensity, double *pdfReal,
    3857             :                                    double *pdfImag,
    3858             :                                    GWKResampleWrkStruct *psWrkStruct);
    3859             : 
    3860             : struct _GWKResampleWrkStruct
    3861             : {
    3862             :     pfnGWKResampleType pfnGWKResample;
    3863             : 
    3864             :     // Space for saved X weights.
    3865             :     double *padfWeightsX;
    3866             :     bool *pabCalcX;
    3867             : 
    3868             :     double *padfWeightsY;       // Only used by GWKResampleOptimizedLanczos.
    3869             :     int iLastSrcX;              // Only used by GWKResampleOptimizedLanczos.
    3870             :     int iLastSrcY;              // Only used by GWKResampleOptimizedLanczos.
    3871             :     double dfLastDeltaX;        // Only used by GWKResampleOptimizedLanczos.
    3872             :     double dfLastDeltaY;        // Only used by GWKResampleOptimizedLanczos.
    3873             :     double dfCosPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3874             :     double dfSinPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3875             :     double dfCosPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3876             :     double dfSinPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3877             :     double dfCosPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3878             :     double dfSinPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3879             :     double dfCosPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3880             :     double dfSinPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3881             : 
    3882             :     // Space for saving a row of pixels.
    3883             :     double *padfRowDensity;
    3884             :     double *padfRowReal;
    3885             :     double *padfRowImag;
    3886             : };
    3887             : 
    3888             : /************************************************************************/
    3889             : /*                     GWKResampleCreateWrkStruct()                     */
    3890             : /************************************************************************/
    3891             : 
    3892             : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3893             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3894             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
    3895             : 
    3896             : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3897             :                                         double dfSrcX, double dfSrcY,
    3898             :                                         double *pdfDensity, double *pdfReal,
    3899             :                                         double *pdfImag,
    3900             :                                         GWKResampleWrkStruct *psWrkStruct);
    3901             : 
    3902         401 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
    3903             : {
    3904         401 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3905         401 :     const int nYDist = (poWK->nYRadius + 1) * 2;
    3906             : 
    3907             :     GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
    3908         401 :         CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
    3909             : 
    3910             :     // Alloc space for saved X weights.
    3911         401 :     psWrkStruct->padfWeightsX =
    3912         401 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3913         401 :     psWrkStruct->pabCalcX =
    3914         401 :         static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
    3915             : 
    3916         401 :     psWrkStruct->padfWeightsY =
    3917         401 :         static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
    3918         401 :     psWrkStruct->iLastSrcX = -10;
    3919         401 :     psWrkStruct->iLastSrcY = -10;
    3920         401 :     psWrkStruct->dfLastDeltaX = -10;
    3921         401 :     psWrkStruct->dfLastDeltaY = -10;
    3922             : 
    3923             :     // Alloc space for saving a row of pixels.
    3924         401 :     if (poWK->pafUnifiedSrcDensity == nullptr &&
    3925         365 :         poWK->panUnifiedSrcValid == nullptr &&
    3926         342 :         poWK->papanBandSrcValid == nullptr)
    3927             :     {
    3928         342 :         psWrkStruct->padfRowDensity = nullptr;
    3929             :     }
    3930             :     else
    3931             :     {
    3932          59 :         psWrkStruct->padfRowDensity =
    3933          59 :             static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3934             :     }
    3935         401 :     psWrkStruct->padfRowReal =
    3936         401 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3937         401 :     psWrkStruct->padfRowImag =
    3938         401 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3939             : 
    3940         401 :     if (poWK->eResample == GRA_Lanczos)
    3941             :     {
    3942          65 :         psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
    3943             : 
    3944          65 :         if (poWK->dfXScale < 1)
    3945             :         {
    3946           4 :             psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
    3947           4 :             psWrkStruct->dfSinPiXScaleOver3 =
    3948           4 :                 sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
    3949           4 :                              psWrkStruct->dfCosPiXScaleOver3);
    3950             :             // "Naive":
    3951             :             // const double dfCosPiXScale = cos(  M_PI * dfXScale );
    3952             :             // const double dfSinPiXScale = sin(  M_PI * dfXScale );
    3953             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3954           4 :             psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
    3955           4 :                                               psWrkStruct->dfCosPiXScaleOver3 -
    3956           4 :                                           3) *
    3957           4 :                                          psWrkStruct->dfCosPiXScaleOver3;
    3958           4 :             psWrkStruct->dfSinPiXScale = sqrt(
    3959           4 :                 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
    3960             :         }
    3961             : 
    3962          65 :         if (poWK->dfYScale < 1)
    3963             :         {
    3964          13 :             psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
    3965          13 :             psWrkStruct->dfSinPiYScaleOver3 =
    3966          13 :                 sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
    3967          13 :                              psWrkStruct->dfCosPiYScaleOver3);
    3968             :             // "Naive":
    3969             :             // const double dfCosPiYScale = cos(  M_PI * dfYScale );
    3970             :             // const double dfSinPiYScale = sin(  M_PI * dfYScale );
    3971             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3972          13 :             psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
    3973          13 :                                               psWrkStruct->dfCosPiYScaleOver3 -
    3974          13 :                                           3) *
    3975          13 :                                          psWrkStruct->dfCosPiYScaleOver3;
    3976          13 :             psWrkStruct->dfSinPiYScale = sqrt(
    3977          13 :                 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
    3978             :         }
    3979             :     }
    3980             :     else
    3981         336 :         psWrkStruct->pfnGWKResample = GWKResample;
    3982             : 
    3983         401 :     return psWrkStruct;
    3984             : }
    3985             : 
    3986             : /************************************************************************/
    3987             : /*                     GWKResampleDeleteWrkStruct()                     */
    3988             : /************************************************************************/
    3989             : 
    3990         401 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
    3991             : {
    3992         401 :     CPLFree(psWrkStruct->padfWeightsX);
    3993         401 :     CPLFree(psWrkStruct->padfWeightsY);
    3994         401 :     CPLFree(psWrkStruct->pabCalcX);
    3995         401 :     CPLFree(psWrkStruct->padfRowDensity);
    3996         401 :     CPLFree(psWrkStruct->padfRowReal);
    3997         401 :     CPLFree(psWrkStruct->padfRowImag);
    3998         401 :     CPLFree(psWrkStruct);
    3999         401 : }
    4000             : 
    4001             : /************************************************************************/
    4002             : /*                            GWKResample()                             */
    4003             : /************************************************************************/
    4004             : 
    4005      239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    4006             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    4007             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
    4008             : 
    4009             : {
    4010             :     // Save as local variables to avoid following pointers in loops.
    4011      239383 :     const int nSrcXSize = poWK->nSrcXSize;
    4012      239383 :     const int nSrcYSize = poWK->nSrcYSize;
    4013             : 
    4014      239383 :     double dfAccumulatorReal = 0.0;
    4015      239383 :     double dfAccumulatorImag = 0.0;
    4016      239383 :     double dfAccumulatorDensity = 0.0;
    4017      239383 :     double dfAccumulatorWeight = 0.0;
    4018      239383 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4019      239383 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4020      239383 :     const GPtrDiff_t iSrcOffset =
    4021      239383 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4022      239383 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4023      239383 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4024             : 
    4025      239383 :     const double dfXScale = poWK->dfXScale;
    4026      239383 :     const double dfYScale = poWK->dfYScale;
    4027             : 
    4028      239383 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    4029             : 
    4030             :     // Space for saved X weights.
    4031      239383 :     double *padfWeightsX = psWrkStruct->padfWeightsX;
    4032      239383 :     bool *pabCalcX = psWrkStruct->pabCalcX;
    4033             : 
    4034             :     // Space for saving a row of pixels.
    4035      239383 :     double *padfRowDensity = psWrkStruct->padfRowDensity;
    4036      239383 :     double *padfRowReal = psWrkStruct->padfRowReal;
    4037      239383 :     double *padfRowImag = psWrkStruct->padfRowImag;
    4038             : 
    4039             :     // Mark as needing calculation (don't calculate the weights yet,
    4040             :     // because a mask may render it unnecessary).
    4041      239383 :     memset(pabCalcX, false, nXDist * sizeof(bool));
    4042             : 
    4043      239383 :     FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
    4044      239383 :     CPLAssert(pfnGetWeight);
    4045             : 
    4046             :     // Skip sampling over edge of image.
    4047      239383 :     int j = poWK->nFiltInitY;
    4048      239383 :     int jMax = poWK->nYRadius;
    4049      239383 :     if (iSrcY + j < 0)
    4050         566 :         j = -iSrcY;
    4051      239383 :     if (iSrcY + jMax >= nSrcYSize)
    4052         662 :         jMax = nSrcYSize - iSrcY - 1;
    4053             : 
    4054      239383 :     int iMin = poWK->nFiltInitX;
    4055      239383 :     int iMax = poWK->nXRadius;
    4056      239383 :     if (iSrcX + iMin < 0)
    4057         566 :         iMin = -iSrcX;
    4058      239383 :     if (iSrcX + iMax >= nSrcXSize)
    4059         659 :         iMax = nSrcXSize - iSrcX - 1;
    4060             : 
    4061      239383 :     const int bXScaleBelow1 = (dfXScale < 1.0);
    4062      239383 :     const int bYScaleBelow1 = (dfYScale < 1.0);
    4063             : 
    4064      239383 :     GPtrDiff_t iRowOffset =
    4065      239383 :         iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
    4066             : 
    4067             :     // Loop over pixel rows in the kernel.
    4068     1445930 :     for (; j <= jMax; ++j)
    4069             :     {
    4070     1206540 :         iRowOffset += nSrcXSize;
    4071             : 
    4072             :         // Get pixel values.
    4073             :         // We can potentially read extra elements after the "normal" end of the
    4074             :         // source arrays, but the contract of papabySrcImage[iBand],
    4075             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4076             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4077     1206540 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4078             :                             padfRowDensity, padfRowReal, padfRowImag))
    4079          72 :             continue;
    4080             : 
    4081             :         // Calculate the Y weight.
    4082             :         double dfWeight1 = (bYScaleBelow1)
    4083     1206470 :                                ? pfnGetWeight((j - dfDeltaY) * dfYScale)
    4084        1600 :                                : pfnGetWeight(j - dfDeltaY);
    4085             : 
    4086             :         // Iterate over pixels in row.
    4087     1206470 :         double dfAccumulatorRealLocal = 0.0;
    4088     1206470 :         double dfAccumulatorImagLocal = 0.0;
    4089     1206470 :         double dfAccumulatorDensityLocal = 0.0;
    4090     1206470 :         double dfAccumulatorWeightLocal = 0.0;
    4091             : 
    4092     7317420 :         for (int i = iMin; i <= iMax; ++i)
    4093             :         {
    4094             :             // Skip sampling if pixel has zero density.
    4095     6110940 :             if (padfRowDensity != nullptr &&
    4096       77277 :                 padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
    4097         546 :                 continue;
    4098             : 
    4099     6110400 :             double dfWeight2 = 0.0;
    4100             : 
    4101             :             // Make or use a cached set of weights for this row.
    4102     6110400 :             if (pabCalcX[i - iMin])
    4103             :             {
    4104             :                 // Use saved weight value instead of recomputing it.
    4105     4903920 :                 dfWeight2 = padfWeightsX[i - iMin];
    4106             :             }
    4107             :             else
    4108             :             {
    4109             :                 // Calculate & save the X weight.
    4110     1206480 :                 padfWeightsX[i - iMin] = dfWeight2 =
    4111     1206480 :                     (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
    4112        1600 :                                     : pfnGetWeight(i - dfDeltaX);
    4113             : 
    4114     1206480 :                 pabCalcX[i - iMin] = true;
    4115             :             }
    4116             : 
    4117             :             // Accumulate!
    4118     6110400 :             dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
    4119     6110400 :             dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
    4120     6110400 :             if (padfRowDensity != nullptr)
    4121       76731 :                 dfAccumulatorDensityLocal +=
    4122       76731 :                     padfRowDensity[i - iMin] * dfWeight2;
    4123     6110400 :             dfAccumulatorWeightLocal += dfWeight2;
    4124             :         }
    4125             : 
    4126     1206470 :         dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
    4127     1206470 :         dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
    4128     1206470 :         dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
    4129     1206470 :         dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
    4130             :     }
    4131             : 
    4132      239383 :     if (dfAccumulatorWeight < 0.000001 ||
    4133        1887 :         (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
    4134             :     {
    4135           0 :         *pdfDensity = 0.0;
    4136           0 :         return false;
    4137             :     }
    4138             : 
    4139             :     // Calculate the output taking into account weighting.
    4140      239383 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4141             :     {
    4142      239380 :         *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
    4143      239380 :         *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
    4144      239380 :         if (padfRowDensity != nullptr)
    4145        1884 :             *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
    4146             :         else
    4147      237496 :             *pdfDensity = 1.0;
    4148             :     }
    4149             :     else
    4150             :     {
    4151           3 :         *pdfReal = dfAccumulatorReal;
    4152           3 :         *pdfImag = dfAccumulatorImag;
    4153           3 :         if (padfRowDensity != nullptr)
    4154           3 :             *pdfDensity = dfAccumulatorDensity;
    4155             :         else
    4156           0 :             *pdfDensity = 1.0;
    4157             :     }
    4158             : 
    4159      239383 :     return true;
    4160             : }
    4161             : 
    4162             : /************************************************************************/
    4163             : /*                    GWKResampleOptimizedLanczos()                     */
    4164             : /************************************************************************/
    4165             : 
    4166      634574 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    4167             :                                         double dfSrcX, double dfSrcY,
    4168             :                                         double *pdfDensity, double *pdfReal,
    4169             :                                         double *pdfImag,
    4170             :                                         GWKResampleWrkStruct *psWrkStruct)
    4171             : 
    4172             : {
    4173             :     // Save as local variables to avoid following pointers in loops.
    4174      634574 :     const int nSrcXSize = poWK->nSrcXSize;
    4175      634574 :     const int nSrcYSize = poWK->nSrcYSize;
    4176             : 
    4177      634574 :     double dfAccumulatorReal = 0.0;
    4178      634574 :     double dfAccumulatorImag = 0.0;
    4179      634574 :     double dfAccumulatorDensity = 0.0;
    4180      634574 :     double dfAccumulatorWeight = 0.0;
    4181      634574 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4182      634574 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4183      634574 :     const GPtrDiff_t iSrcOffset =
    4184      634574 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4185      634574 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4186      634574 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4187             : 
    4188      634574 :     const double dfXScale = poWK->dfXScale;
    4189      634574 :     const double dfYScale = poWK->dfYScale;
    4190             : 
    4191             :     // Space for saved X weights.
    4192      634574 :     double *const padfWeightsXShifted =
    4193      634574 :         psWrkStruct->padfWeightsX - poWK->nFiltInitX;
    4194      634574 :     double *const padfWeightsYShifted =
    4195      634574 :         psWrkStruct->padfWeightsY - poWK->nFiltInitY;
    4196             : 
    4197             :     // Space for saving a row of pixels.
    4198      634574 :     double *const padfRowDensity = psWrkStruct->padfRowDensity;
    4199      634574 :     double *const padfRowReal = psWrkStruct->padfRowReal;
    4200      634574 :     double *const padfRowImag = psWrkStruct->padfRowImag;
    4201             : 
    4202             :     // Skip sampling over edge of image.
    4203      634574 :     int jMin = poWK->nFiltInitY;
    4204      634574 :     int jMax = poWK->nYRadius;
    4205      634574 :     if (iSrcY + jMin < 0)
    4206       19638 :         jMin = -iSrcY;
    4207      634574 :     if (iSrcY + jMax >= nSrcYSize)
    4208        7942 :         jMax = nSrcYSize - iSrcY - 1;
    4209             : 
    4210      634574 :     int iMin = poWK->nFiltInitX;
    4211      634574 :     int iMax = poWK->nXRadius;
    4212      634574 :     if (iSrcX + iMin < 0)
    4213       18827 :         iMin = -iSrcX;
    4214      634574 :     if (iSrcX + iMax >= nSrcXSize)
    4215        6817 :         iMax = nSrcXSize - iSrcX - 1;
    4216             : 
    4217      634574 :     if (dfXScale < 1.0)
    4218             :     {
    4219      403041 :         while ((iMin - dfDeltaX) * dfXScale < -3.0)
    4220      200179 :             iMin++;
    4221      202862 :         while ((iMax - dfDeltaX) * dfXScale > 3.0)
    4222           0 :             iMax--;
    4223             : 
    4224             :         // clang-format off
    4225             :         /*
    4226             :         Naive version:
    4227             :         for (int i = iMin; i <= iMax; ++i)
    4228             :         {
    4229             :             psWrkStruct->padfWeightsXShifted[i] =
    4230             :                 GWKLanczosSinc((i - dfDeltaX) * dfXScale);
    4231             :         }
    4232             : 
    4233             :         but given that:
    4234             : 
    4235             :         GWKLanczosSinc(x):
    4236             :             if (dfX == 0.0)
    4237             :                 return 1.0;
    4238             : 
    4239             :             const double dfPIX = M_PI * dfX;
    4240             :             const double dfPIXoverR = dfPIX / 3;
    4241             :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    4242             :             return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
    4243             : 
    4244             :         and
    4245             :             sin (a + b) = sin a cos b + cos a sin b.
    4246             :             cos (a + b) = cos a cos b - sin a sin b.
    4247             : 
    4248             :         we can skip any sin() computation within the loop
    4249             :         */
    4250             :         // clang-format on
    4251             : 
    4252      202862 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    4253      131072 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    4254             :         {
    4255       71790 :             double dfX = (iMin - dfDeltaX) * dfXScale;
    4256             : 
    4257       71790 :             double dfPIXover3 = M_PI / 3 * dfX;
    4258       71790 :             double dfCosOver3 = cos(dfPIXover3);
    4259       71790 :             double dfSinOver3 = sin(dfPIXover3);
    4260             : 
    4261             :             // "Naive":
    4262             :             // double dfSin = sin( M_PI * dfX );
    4263             :             // double dfCos = cos( M_PI * dfX );
    4264             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4265       71790 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4266       71790 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4267             : 
    4268       71790 :             const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
    4269       71790 :             const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
    4270       71790 :             const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
    4271       71790 :             const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
    4272       71790 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4273       71790 :             padfWeightsXShifted[iMin] =
    4274       71790 :                 dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
    4275     1636480 :             for (int i = iMin + 1; i <= iMax; ++i)
    4276             :             {
    4277     1564690 :                 dfX += dfXScale;
    4278     1564690 :                 const double dfNewSin =
    4279     1564690 :                     dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
    4280     1564690 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
    4281     1564690 :                                              dfCosOver3 * dfSinPiXScaleOver3;
    4282     1564690 :                 padfWeightsXShifted[i] =
    4283             :                     dfX == 0
    4284     1564690 :                         ? 1.0
    4285     1564690 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
    4286     1564690 :                 const double dfNewCos =
    4287     1564690 :                     dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
    4288     1564690 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
    4289     1564690 :                                              dfSinOver3 * dfSinPiXScaleOver3;
    4290     1564690 :                 dfSin = dfNewSin;
    4291     1564690 :                 dfCos = dfNewCos;
    4292     1564690 :                 dfSinOver3 = dfNewSinOver3;
    4293     1564690 :                 dfCosOver3 = dfNewCosOver3;
    4294             :             }
    4295             : 
    4296       71790 :             psWrkStruct->iLastSrcX = iSrcX;
    4297       71790 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4298             :         }
    4299             :     }
    4300             :     else
    4301             :     {
    4302      789372 :         while (iMin - dfDeltaX < -3.0)
    4303      357660 :             iMin++;
    4304      431712 :         while (iMax - dfDeltaX > 3.0)
    4305           0 :             iMax--;
    4306             : 
    4307      431712 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    4308      225330 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    4309             :         {
    4310             :             // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
    4311             :             // following trigonometric formulas.
    4312             : 
    4313             :             // TODO(schwehr): Move this somewhere where it can be rendered at
    4314             :             // LaTeX.
    4315             :             // clang-format off
    4316             :             // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
    4317             :             //                            cos(M_PI * dfBase) * sin(M_PI * k)
    4318             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
    4319             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
    4320             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
    4321             : 
    4322             :             // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
    4323             :             //                                  cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
    4324             :             // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
    4325             :             // clang-format on
    4326             : 
    4327      420092 :             const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
    4328      420092 :             const double dfSin2PIDeltaXOver3 =
    4329             :                 dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
    4330             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
    4331      420092 :             const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
    4332      420092 :             const double dfSinPIDeltaX =
    4333      420092 :                 (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
    4334      420092 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4335      420092 :             const double dfInvPI2Over3xSinPIDeltaX =
    4336             :                 dfInvPI2Over3 * dfSinPIDeltaX;
    4337      420092 :             const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
    4338      420092 :                 -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
    4339      420092 :             const double dfSinPIOver3 = 0.8660254037844386;
    4340      420092 :             const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
    4341      420092 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
    4342             :             const double padfCst[] = {
    4343      420092 :                 dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
    4344      420092 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
    4345             :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
    4346      420092 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
    4347      420092 :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
    4348             : 
    4349     2974940 :             for (int i = iMin; i <= iMax; ++i)
    4350             :             {
    4351     2554850 :                 const double dfX = i - dfDeltaX;
    4352     2554850 :                 if (dfX == 0.0)
    4353       58282 :                     padfWeightsXShifted[i] = 1.0;
    4354             :                 else
    4355     2496570 :                     padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
    4356             : #if DEBUG_VERBOSE
    4357             :                 // TODO(schwehr): AlmostEqual.
    4358             :                 // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
    4359             :                 //               GWKLanczosSinc(dfX, 3.0)) < 1e-10);
    4360             : #endif
    4361             :             }
    4362             : 
    4363      420092 :             psWrkStruct->iLastSrcX = iSrcX;
    4364      420092 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4365             :         }
    4366             :     }
    4367             : 
    4368      634574 :     if (dfYScale < 1.0)
    4369             :     {
    4370      406666 :         while ((jMin - dfDeltaY) * dfYScale < -3.0)
    4371      203804 :             jMin++;
    4372      206462 :         while ((jMax - dfDeltaY) * dfYScale > 3.0)
    4373        3600 :             jMax--;
    4374             : 
    4375             :         // clang-format off
    4376             :         /*
    4377             :         Naive version:
    4378             :         for (int j = jMin; j <= jMax; ++j)
    4379             :         {
    4380             :             padfWeightsYShifted[j] =
    4381             :                 GWKLanczosSinc((j - dfDeltaY) * dfYScale);
    4382             :         }
    4383             :         */
    4384             :         // clang-format on
    4385             : 
    4386      202862 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4387      202479 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4388             :         {
    4389         383 :             double dfY = (jMin - dfDeltaY) * dfYScale;
    4390             : 
    4391         383 :             double dfPIYover3 = M_PI / 3 * dfY;
    4392         383 :             double dfCosOver3 = cos(dfPIYover3);
    4393         383 :             double dfSinOver3 = sin(dfPIYover3);
    4394             : 
    4395             :             // "Naive":
    4396             :             // double dfSin = sin( M_PI * dfY );
    4397             :             // double dfCos = cos( M_PI * dfY );
    4398             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4399         383 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4400         383 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4401             : 
    4402         383 :             const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
    4403         383 :             const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
    4404         383 :             const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
    4405         383 :             const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
    4406         383 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4407         383 :             padfWeightsYShifted[jMin] =
    4408         383 :                 dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
    4409        7318 :             for (int j = jMin + 1; j <= jMax; ++j)
    4410             :             {
    4411        6935 :                 dfY += dfYScale;
    4412        6935 :                 const double dfNewSin =
    4413        6935 :                     dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
    4414        6935 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
    4415        6935 :                                              dfCosOver3 * dfSinPiYScaleOver3;
    4416        6935 :                 padfWeightsYShifted[j] =
    4417             :                     dfY == 0
    4418        6935 :                         ? 1.0
    4419        6935 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
    4420        6935 :                 const double dfNewCos =
    4421        6935 :                     dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
    4422        6935 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
    4423        6935 :                                              dfSinOver3 * dfSinPiYScaleOver3;
    4424        6935 :                 dfSin = dfNewSin;
    4425        6935 :                 dfCos = dfNewCos;
    4426        6935 :                 dfSinOver3 = dfNewSinOver3;
    4427        6935 :                 dfCosOver3 = dfNewCosOver3;
    4428             :             }
    4429             : 
    4430         383 :             psWrkStruct->iLastSrcY = iSrcY;
    4431         383 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4432             :         }
    4433             :     }
    4434             :     else
    4435             :     {
    4436      713336 :         while (jMin - dfDeltaY < -3.0)
    4437      281624 :             jMin++;
    4438      431712 :         while (jMax - dfDeltaY > 3.0)
    4439           0 :             jMax--;
    4440             : 
    4441      431712 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4442      431040 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4443             :         {
    4444        6942 :             const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
    4445        6942 :             const double dfSin2PIDeltaYOver3 =
    4446             :                 dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
    4447             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
    4448        6942 :             const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
    4449        6942 :             const double dfSinPIDeltaY =
    4450        6942 :                 (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
    4451        6942 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4452        6942 :             const double dfInvPI2Over3xSinPIDeltaY =
    4453             :                 dfInvPI2Over3 * dfSinPIDeltaY;
    4454        6942 :             const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
    4455        6942 :                 -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
    4456        6942 :             const double dfSinPIOver3 = 0.8660254037844386;
    4457        6942 :             const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
    4458        6942 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
    4459             :             const double padfCst[] = {
    4460        6942 :                 dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
    4461        6942 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
    4462             :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
    4463        6942 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
    4464        6942 :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
    4465             : 
    4466       45985 :             for (int j = jMin; j <= jMax; ++j)
    4467             :             {
    4468       39043 :                 const double dfY = j - dfDeltaY;
    4469       39043 :                 if (dfY == 0.0)
    4470         468 :                     padfWeightsYShifted[j] = 1.0;
    4471             :                 else
    4472       38575 :                     padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
    4473             : #if DEBUG_VERBOSE
    4474             :                 // TODO(schwehr): AlmostEqual.
    4475             :                 // CPLAssert(fabs(padfWeightsYShifted[j] -
    4476             :                 //               GWKLanczosSinc(dfY, 3.0)) < 1e-10);
    4477             : #endif
    4478             :             }
    4479             : 
    4480        6942 :             psWrkStruct->iLastSrcY = iSrcY;
    4481        6942 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4482             :         }
    4483             :     }
    4484             : 
    4485             :     // If we have no density information, we can simply compute the
    4486             :     // accumulated weight.
    4487      634574 :     if (padfRowDensity == nullptr)
    4488             :     {
    4489      634574 :         double dfRowAccWeight = 0.0;
    4490     8017750 :         for (int i = iMin; i <= iMax; ++i)
    4491             :         {
    4492     7383170 :             dfRowAccWeight += padfWeightsXShifted[i];
    4493             :         }
    4494      634574 :         double dfColAccWeight = 0.0;
    4495     8075420 :         for (int j = jMin; j <= jMax; ++j)
    4496             :         {
    4497     7440850 :             dfColAccWeight += padfWeightsYShifted[j];
    4498             :         }
    4499      634574 :         dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
    4500             :     }
    4501             : 
    4502             :     // Loop over pixel rows in the kernel.
    4503             : 
    4504      634574 :     if (poWK->eWorkingDataType == GDT_UInt8 && !poWK->panUnifiedSrcValid &&
    4505      633954 :         !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
    4506             :         !padfRowDensity)
    4507             :     {
    4508             :         // Optimization for Byte case without any masking/alpha
    4509             : 
    4510      633954 :         if (dfAccumulatorWeight < 0.000001)
    4511             :         {
    4512           0 :             *pdfDensity = 0.0;
    4513           0 :             return false;
    4514             :         }
    4515             : 
    4516      633954 :         const GByte *pSrc =
    4517      633954 :             reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
    4518      633954 :         pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4519             : 
    4520             : #if defined(USE_SSE2)
    4521      633954 :         if (iMax - iMin + 1 == 6)
    4522             :         {
    4523             :             // This is just an optimized version of the general case in
    4524             :             // the else clause.
    4525             : 
    4526      359916 :             pSrc += iMin;
    4527      359916 :             int j = jMin;
    4528             :             const auto fourXWeights =
    4529      359916 :                 XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
    4530             : 
    4531             :             // Process 2 lines at the same time.
    4532     1424180 :             for (; j < jMax; j += 2)
    4533             :             {
    4534             :                 const XMMReg4Double v_acc =
    4535     1064270 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4536             :                 const XMMReg4Double v_acc2 =
    4537     1064270 :                     XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
    4538     1064270 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4539     1064270 :                 const double dfRowAccEnd =
    4540     1064270 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4541     1064270 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4542     1064270 :                 dfAccumulatorReal +=
    4543     1064270 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4544     1064270 :                 const double dfRowAcc2 = v_acc2.GetHorizSum();
    4545     1064270 :                 const double dfRowAcc2End =
    4546     1064270 :                     pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
    4547     1064270 :                     pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
    4548     1064270 :                 dfAccumulatorReal +=
    4549     1064270 :                     (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
    4550     1064270 :                 pSrc += 2 * nSrcXSize;
    4551             :             }
    4552      359916 :             if (j == jMax)
    4553             :             {
    4554             :                 // Process last line if there's an odd number of them.
    4555             : 
    4556             :                 const XMMReg4Double v_acc =
    4557       90039 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4558       90039 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4559       90039 :                 const double dfRowAccEnd =
    4560       90039 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4561       90039 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4562       90039 :                 dfAccumulatorReal +=
    4563       90039 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4564             :             }
    4565             :         }
    4566             :         else
    4567             : #endif
    4568             :         {
    4569     5493380 :             for (int j = jMin; j <= jMax; ++j)
    4570             :             {
    4571     5219340 :                 int i = iMin;
    4572     5219340 :                 double dfRowAcc1 = 0.0;
    4573     5219340 :                 double dfRowAcc2 = 0.0;
    4574             :                 // A bit of loop unrolling
    4575    62823300 :                 for (; i < iMax; i += 2)
    4576             :                 {
    4577    57604000 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4578    57604000 :                     dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
    4579             :                 }
    4580     5219340 :                 if (i == iMax)
    4581             :                 {
    4582             :                     // Process last column if there's an odd number of them.
    4583      442077 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4584             :                 }
    4585             : 
    4586     5219340 :                 dfAccumulatorReal +=
    4587     5219340 :                     (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
    4588     5219340 :                 pSrc += nSrcXSize;
    4589             :             }
    4590             :         }
    4591             : 
    4592             :         // Calculate the output taking into account weighting.
    4593      633954 :         if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4594             :         {
    4595      579748 :             const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4596      579748 :             *pdfReal = dfAccumulatorReal * dfInvAcc;
    4597      579748 :             *pdfDensity = 1.0;
    4598             :         }
    4599             :         else
    4600             :         {
    4601       54206 :             *pdfReal = dfAccumulatorReal;
    4602       54206 :             *pdfDensity = 1.0;
    4603             :         }
    4604             : 
    4605      633954 :         return true;
    4606             :     }
    4607             : 
    4608         620 :     GPtrDiff_t iRowOffset =
    4609         620 :         iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
    4610             : 
    4611         620 :     int nCountValid = 0;
    4612         620 :     const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
    4613             : 
    4614        3560 :     for (int j = jMin; j <= jMax; ++j)
    4615             :     {
    4616        2940 :         iRowOffset += nSrcXSize;
    4617             : 
    4618             :         // Get pixel values.
    4619             :         // We can potentially read extra elements after the "normal" end of the
    4620             :         // source arrays, but the contract of papabySrcImage[iBand],
    4621             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4622             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4623        2940 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4624             :                             padfRowDensity, padfRowReal, padfRowImag))
    4625           0 :             continue;
    4626             : 
    4627        2940 :         const double dfWeight1 = padfWeightsYShifted[j];
    4628             : 
    4629             :         // Iterate over pixels in row.
    4630        2940 :         if (padfRowDensity != nullptr)
    4631             :         {
    4632           0 :             for (int i = iMin; i <= iMax; ++i)
    4633             :             {
    4634             :                 // Skip sampling if pixel has zero density.
    4635           0 :                 if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
    4636           0 :                     continue;
    4637             : 
    4638           0 :                 nCountValid++;
    4639             : 
    4640             :                 //  Use a cached set of weights for this row.
    4641           0 :                 const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
    4642             : 
    4643             :                 // Accumulate!
    4644           0 :                 dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
    4645           0 :                 dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
    4646           0 :                 dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
    4647           0 :                 dfAccumulatorWeight += dfWeight2;
    4648             :             }
    4649             :         }
    4650        2940 :         else if (bIsNonComplex)
    4651             :         {
    4652        1764 :             double dfRowAccReal = 0.0;
    4653       10560 :             for (int i = iMin; i <= iMax; ++i)
    4654             :             {
    4655        8796 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4656             : 
    4657             :                 // Accumulate!
    4658        8796 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4659             :             }
    4660             : 
    4661        1764 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4662             :         }
    4663             :         else
    4664             :         {
    4665        1176 :             double dfRowAccReal = 0.0;
    4666        1176 :             double dfRowAccImag = 0.0;
    4667        7040 :             for (int i = iMin; i <= iMax; ++i)
    4668             :             {
    4669        5864 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4670             : 
    4671             :                 // Accumulate!
    4672        5864 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4673        5864 :                 dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
    4674             :             }
    4675             : 
    4676        1176 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4677        1176 :             dfAccumulatorImag += dfRowAccImag * dfWeight1;
    4678             :         }
    4679             :     }
    4680             : 
    4681         620 :     if (dfAccumulatorWeight < 0.000001 ||
    4682           0 :         (padfRowDensity != nullptr &&
    4683           0 :          (dfAccumulatorDensity < 0.000001 ||
    4684           0 :           nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
    4685             :     {
    4686           0 :         *pdfDensity = 0.0;
    4687           0 :         return false;
    4688             :     }
    4689             : 
    4690             :     // Calculate the output taking into account weighting.
    4691         620 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4692             :     {
    4693           0 :         const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4694           0 :         *pdfReal = dfAccumulatorReal * dfInvAcc;
    4695           0 :         *pdfImag = dfAccumulatorImag * dfInvAcc;
    4696           0 :         if (padfRowDensity != nullptr)
    4697           0 :             *pdfDensity = dfAccumulatorDensity * dfInvAcc;
    4698             :         else
    4699           0 :             *pdfDensity = 1.0;
    4700             :     }
    4701             :     else
    4702             :     {
    4703         620 :         *pdfReal = dfAccumulatorReal;
    4704         620 :         *pdfImag = dfAccumulatorImag;
    4705         620 :         if (padfRowDensity != nullptr)
    4706           0 :             *pdfDensity = dfAccumulatorDensity;
    4707             :         else
    4708         620 :             *pdfDensity = 1.0;
    4709             :     }
    4710             : 
    4711         620 :     return true;
    4712             : }
    4713             : 
    4714             : /************************************************************************/
    4715             : /*                         GWKComputeWeights()                          */
    4716             : /************************************************************************/
    4717             : 
    4718     1222150 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
    4719             :                               double dfDeltaX, double dfXScale, int jMin,
    4720             :                               int jMax, double dfDeltaY, double dfYScale,
    4721             :                               double *padfWeightsHorizontal,
    4722             :                               double *padfWeightsVertical, double &dfInvWeights)
    4723             : {
    4724             : 
    4725     1222150 :     const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
    4726     1222150 :     CPLAssert(pfnGetWeight);
    4727     1222150 :     const FilterFunc4ValuesType pfnGetWeight4Values =
    4728     1222150 :         apfGWKFilter4Values[eResample];
    4729     1222150 :     CPLAssert(pfnGetWeight4Values);
    4730             : 
    4731     1222150 :     int i = iMin;  // Used after for.
    4732     1222150 :     int iC = 0;    // Used after for.
    4733             :     // Not zero, but as close as possible to it, to avoid potential division by
    4734             :     // zero at end of function
    4735     1222150 :     double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
    4736     2990960 :     for (; i + 2 < iMax; i += 4, iC += 4)
    4737             :     {
    4738     1768820 :         padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
    4739     1768820 :         padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
    4740     1768820 :         padfWeightsHorizontal[iC + 2] =
    4741     1768820 :             padfWeightsHorizontal[iC + 1] + dfXScale;
    4742     1768820 :         padfWeightsHorizontal[iC + 3] =
    4743     1768820 :             padfWeightsHorizontal[iC + 2] + dfXScale;
    4744     1768820 :         dfAccumulatorWeightHorizontal +=
    4745     1768820 :             pfnGetWeight4Values(padfWeightsHorizontal + iC);
    4746             :     }
    4747     1280860 :     for (; i <= iMax; ++i, ++iC)
    4748             :     {
    4749       58719 :         const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
    4750       58719 :         padfWeightsHorizontal[iC] = dfWeight;
    4751       58719 :         dfAccumulatorWeightHorizontal += dfWeight;
    4752             :     }
    4753             : 
    4754     1222150 :     int j = jMin;  // Used after for.
    4755     1222150 :     int jC = 0;    // Used after for.
    4756             :     // Not zero, but as close as possible to it, to avoid potential division by
    4757             :     // zero at end of function
    4758     1222150 :     double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
    4759     2984620 :     for (; j + 2 < jMax; j += 4, jC += 4)
    4760             :     {
    4761     1762470 :         padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
    4762     1762470 :         padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
    4763     1762470 :         padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
    4764     1762470 :         padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
    4765     1762470 :         dfAccumulatorWeightVertical +=
    4766     1762470 :             pfnGetWeight4Values(padfWeightsVertical + jC);
    4767             :     }
    4768     1288930 :     for (; j <= jMax; ++j, ++jC)
    4769             :     {
    4770       66786 :         const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
    4771       66786 :         padfWeightsVertical[jC] = dfWeight;
    4772       66786 :         dfAccumulatorWeightVertical += dfWeight;
    4773             :     }
    4774             : 
    4775     1222150 :     dfInvWeights =
    4776     1222150 :         1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
    4777     1222150 : }
    4778             : 
    4779             : /************************************************************************/
    4780             : /*                        GWKResampleNoMasksT()                         */
    4781             : /************************************************************************/
    4782             : 
    4783             : template <class T>
    4784             : static bool
    4785             : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    4786             :                     double dfSrcY, T *pValue, double *padfWeightsHorizontal,
    4787             :                     double *padfWeightsVertical, double &dfInvWeights)
    4788             : 
    4789             : {
    4790             :     // Commonly used; save locally.
    4791             :     const int nSrcXSize = poWK->nSrcXSize;
    4792             :     const int nSrcYSize = poWK->nSrcYSize;
    4793             : 
    4794             :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4795             :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4796             :     const GPtrDiff_t iSrcOffset =
    4797             :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4798             : 
    4799             :     const int nXRadius = poWK->nXRadius;
    4800             :     const int nYRadius = poWK->nYRadius;
    4801             : 
    4802             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4803             :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4804             :         nYRadius > nSrcYSize)
    4805             :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4806             :                                                   pValue);
    4807             : 
    4808             :     T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    4809             :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4810             :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4811             : 
    4812             :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4813             :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4814             : 
    4815             :     int iMin = 1 - nXRadius;
    4816             :     if (iSrcX + iMin < 0)
    4817             :         iMin = -iSrcX;
    4818             :     int iMax = nXRadius;
    4819             :     if (iSrcX + iMax >= nSrcXSize - 1)
    4820             :         iMax = nSrcXSize - 1 - iSrcX;
    4821             : 
    4822             :     int jMin = 1 - nYRadius;
    4823             :     if (iSrcY + jMin < 0)
    4824             :         jMin = -iSrcY;
    4825             :     int jMax = nYRadius;
    4826             :     if (iSrcY + jMax >= nSrcYSize - 1)
    4827             :         jMax = nSrcYSize - 1 - iSrcY;
    4828             : 
    4829             :     if (iBand == 0)
    4830             :     {
    4831             :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4832             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4833             :                           padfWeightsVertical, dfInvWeights);
    4834             :     }
    4835             : 
    4836             :     // Loop over all rows in the kernel.
    4837             :     double dfAccumulator = 0.0;
    4838             :     for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
    4839             :     {
    4840             :         const GPtrDiff_t iSampJ =
    4841             :             iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
    4842             : 
    4843             :         // Loop over all pixels in the row.
    4844             :         double dfAccumulatorLocal = 0.0;
    4845             :         double dfAccumulatorLocal2 = 0.0;
    4846             :         int iC = 0;
    4847             :         int i = iMin;
    4848             :         // Process by chunk of 4 cols.
    4849             :         for (; i + 2 < iMax; i += 4, iC += 4)
    4850             :         {
    4851             :             // Retrieve the pixel & accumulate.
    4852             :             dfAccumulatorLocal +=
    4853             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4854             :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    4855             :                                   padfWeightsHorizontal[iC + 1];
    4856             :             dfAccumulatorLocal2 += double(pSrcBand[i + 2 + iSampJ]) *
    4857             :                                    padfWeightsHorizontal[iC + 2];
    4858             :             dfAccumulatorLocal2 += double(pSrcBand[i + 3 + iSampJ]) *
    4859             :                                    padfWeightsHorizontal[iC + 3];
    4860             :         }
    4861             :         dfAccumulatorLocal += dfAccumulatorLocal2;
    4862             :         if (i < iMax)
    4863             :         {
    4864             :             dfAccumulatorLocal +=
    4865             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4866             :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    4867             :                                   padfWeightsHorizontal[iC + 1];
    4868             :             i += 2;
    4869             :             iC += 2;
    4870             :         }
    4871             :         if (i == iMax)
    4872             :         {
    4873             :             dfAccumulatorLocal +=
    4874             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4875             :         }
    4876             : 
    4877             :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4878             :     }
    4879             : 
    4880             :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4881             : 
    4882             :     return true;
    4883             : }
    4884             : 
    4885             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    4886             : /* Could possibly be used too on 32bit, but we would need to check at runtime */
    4887             : #if defined(USE_SSE2)
    4888             : 
    4889             : /************************************************************************/
    4890             : /*                     GWKResampleNoMasks_SSE2_T()                      */
    4891             : /************************************************************************/
    4892             : 
    4893             : template <class T>
    4894     1775366 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
    4895             :                                       double dfSrcX, double dfSrcY, T *pValue,
    4896             :                                       double *padfWeightsHorizontal,
    4897             :                                       double *padfWeightsVertical,
    4898             :                                       double &dfInvWeights)
    4899             : {
    4900             :     // Commonly used; save locally.
    4901     1775366 :     const int nSrcXSize = poWK->nSrcXSize;
    4902     1775366 :     const int nSrcYSize = poWK->nSrcYSize;
    4903             : 
    4904     1775366 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4905     1775366 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4906     1775366 :     const GPtrDiff_t iSrcOffset =
    4907     1775366 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4908     1775366 :     const int nXRadius = poWK->nXRadius;
    4909     1775366 :     const int nYRadius = poWK->nYRadius;
    4910             : 
    4911             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4912     1775366 :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4913             :         nYRadius > nSrcYSize)
    4914           3 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4915           3 :                                                   pValue);
    4916             : 
    4917     1775364 :     const T *pSrcBand =
    4918     1775364 :         reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    4919             : 
    4920     1775364 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4921     1775364 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4922     1775364 :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4923     1775364 :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4924             : 
    4925     1775364 :     int iMin = 1 - nXRadius;
    4926     1775364 :     if (iSrcX + iMin < 0)
    4927       22616 :         iMin = -iSrcX;
    4928     1775364 :     int iMax = nXRadius;
    4929     1775364 :     if (iSrcX + iMax >= nSrcXSize - 1)
    4930        9506 :         iMax = nSrcXSize - 1 - iSrcX;
    4931             : 
    4932     1775364 :     int jMin = 1 - nYRadius;
    4933     1775364 :     if (iSrcY + jMin < 0)
    4934       26049 :         jMin = -iSrcY;
    4935     1775364 :     int jMax = nYRadius;
    4936     1775364 :     if (iSrcY + jMax >= nSrcYSize - 1)
    4937       13135 :         jMax = nSrcYSize - 1 - iSrcY;
    4938             : 
    4939     1775364 :     if (iBand == 0)
    4940             :     {
    4941     1222146 :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4942             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4943             :                           padfWeightsVertical, dfInvWeights);
    4944             :     }
    4945             : 
    4946     1775364 :     GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4947             :     // Process by chunk of 4 rows.
    4948     1775364 :     int jC = 0;
    4949     1775364 :     int j = jMin;
    4950     1775364 :     double dfAccumulator = 0.0;
    4951     5023910 :     for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
    4952             :     {
    4953             :         // Loop over all pixels in the row.
    4954     3248546 :         int iC = 0;
    4955     3248546 :         int i = iMin;
    4956             :         // Process by chunk of 4 cols.
    4957     3248546 :         XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
    4958     3248546 :         XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
    4959     3248546 :         XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
    4960     3248546 :         XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
    4961    11835082 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4962             :         {
    4963             :             // Retrieve the pixel & accumulate.
    4964     8586546 :             XMMReg4Double v_pixels_1 =
    4965     8586546 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4966     8586546 :             XMMReg4Double v_pixels_2 =
    4967     8586546 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
    4968     8586546 :             XMMReg4Double v_pixels_3 =
    4969     8586546 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4970     8586546 :             XMMReg4Double v_pixels_4 =
    4971     8586546 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4972             : 
    4973     8586546 :             XMMReg4Double v_padfWeight =
    4974     8586546 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4975             : 
    4976     8586546 :             v_acc_1 += v_pixels_1 * v_padfWeight;
    4977     8586546 :             v_acc_2 += v_pixels_2 * v_padfWeight;
    4978     8586546 :             v_acc_3 += v_pixels_3 * v_padfWeight;
    4979     8586546 :             v_acc_4 += v_pixels_4 * v_padfWeight;
    4980             :         }
    4981             : 
    4982     3248546 :         if (i < iMax)
    4983             :         {
    4984       49932 :             XMMReg2Double v_pixels_1 =
    4985       49932 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
    4986       49932 :             XMMReg2Double v_pixels_2 =
    4987       49932 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
    4988       49932 :             XMMReg2Double v_pixels_3 =
    4989       49932 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4990       49932 :             XMMReg2Double v_pixels_4 =
    4991       49932 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4992             : 
    4993       49932 :             XMMReg2Double v_padfWeight =
    4994       49932 :                 XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
    4995             : 
    4996       49932 :             v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
    4997       49932 :             v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
    4998       49932 :             v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
    4999       49932 :             v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
    5000             : 
    5001       49932 :             i += 2;
    5002       49932 :             iC += 2;
    5003             :         }
    5004             : 
    5005     3248546 :         double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
    5006     3248546 :         double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
    5007     3248546 :         double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
    5008     3248546 :         double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
    5009             : 
    5010     3248546 :         if (i == iMax)
    5011             :         {
    5012       27545 :             dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
    5013       27545 :                                     padfWeightsHorizontal[iC];
    5014       27545 :             dfAccumulatorLocal_2 +=
    5015       27545 :                 static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
    5016       27545 :                 padfWeightsHorizontal[iC];
    5017       27545 :             dfAccumulatorLocal_3 +=
    5018       27545 :                 static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
    5019       27545 :                 padfWeightsHorizontal[iC];
    5020       27545 :             dfAccumulatorLocal_4 +=
    5021       27545 :                 static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
    5022       27545 :                 padfWeightsHorizontal[iC];
    5023             :         }
    5024             : 
    5025     3248546 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
    5026     3248546 :         dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
    5027     3248546 :         dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
    5028     3248546 :         dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
    5029             :     }
    5030     1866210 :     for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
    5031             :     {
    5032             :         // Loop over all pixels in the row.
    5033       90850 :         int iC = 0;
    5034       90850 :         int i = iMin;
    5035             :         // Process by chunk of 4 cols.
    5036       90850 :         XMMReg4Double v_acc = XMMReg4Double::Zero();
    5037      243258 :         for (; i + 2 < iMax; i += 4, iC += 4)
    5038             :         {
    5039             :             // Retrieve the pixel & accumulate.
    5040      152408 :             XMMReg4Double v_pixels =
    5041      152408 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    5042      152408 :             XMMReg4Double v_padfWeight =
    5043      152408 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    5044             : 
    5045      152408 :             v_acc += v_pixels * v_padfWeight;
    5046             :         }
    5047             : 
    5048       90850 :         double dfAccumulatorLocal = v_acc.GetHorizSum();
    5049             : 
    5050       90850 :         if (i < iMax)
    5051             :         {
    5052        2090 :             dfAccumulatorLocal +=
    5053        2090 :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    5054        2090 :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    5055        2090 :                                   padfWeightsHorizontal[iC + 1];
    5056        2090 :             i += 2;
    5057        2090 :             iC += 2;
    5058             :         }
    5059       90850 :         if (i == iMax)
    5060             :         {
    5061        1839 :             dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
    5062        1839 :                                   padfWeightsHorizontal[iC];
    5063             :         }
    5064             : 
    5065       90850 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    5066             :     }
    5067             : 
    5068     1775364 :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    5069             : 
    5070     1775364 :     return true;
    5071             : }
    5072             : 
    5073             : /************************************************************************/
    5074             : /*                     GWKResampleNoMasksT<GByte>()                     */
    5075             : /************************************************************************/
    5076             : 
    5077             : template <>
    5078     1270240 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
    5079             :                                 double dfSrcX, double dfSrcY, GByte *pValue,
    5080             :                                 double *padfWeightsHorizontal,
    5081             :                                 double *padfWeightsVertical,
    5082             :                                 double &dfInvWeights)
    5083             : {
    5084     1270240 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5085             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5086     1270240 :                                      dfInvWeights);
    5087             : }
    5088             : 
    5089             : /************************************************************************/
    5090             : /*                    GWKResampleNoMasksT<GInt16>()                     */
    5091             : /************************************************************************/
    5092             : 
    5093             : template <>
    5094      252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
    5095             :                                  double dfSrcX, double dfSrcY, GInt16 *pValue,
    5096             :                                  double *padfWeightsHorizontal,
    5097             :                                  double *padfWeightsVertical,
    5098             :                                  double &dfInvWeights)
    5099             : {
    5100      252563 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5101             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5102      252563 :                                      dfInvWeights);
    5103             : }
    5104             : 
    5105             : /************************************************************************/
    5106             : /*                    GWKResampleNoMasksT<GUInt16>()                    */
    5107             : /************************************************************************/
    5108             : 
    5109             : template <>
    5110      250063 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
    5111             :                                   double dfSrcX, double dfSrcY, GUInt16 *pValue,
    5112             :                                   double *padfWeightsHorizontal,
    5113             :                                   double *padfWeightsVertical,
    5114             :                                   double &dfInvWeights)
    5115             : {
    5116      250063 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5117             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5118      250063 :                                      dfInvWeights);
    5119             : }
    5120             : 
    5121             : /************************************************************************/
    5122             : /*                     GWKResampleNoMasksT<float>()                     */
    5123             : /************************************************************************/
    5124             : 
    5125             : template <>
    5126        2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
    5127             :                                 double dfSrcX, double dfSrcY, float *pValue,
    5128             :                                 double *padfWeightsHorizontal,
    5129             :                                 double *padfWeightsVertical,
    5130             :                                 double &dfInvWeights)
    5131             : {
    5132        2500 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5133             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5134        2500 :                                      dfInvWeights);
    5135             : }
    5136             : 
    5137             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    5138             : 
    5139             : /************************************************************************/
    5140             : /*                    GWKResampleNoMasksT<double>()                     */
    5141             : /************************************************************************/
    5142             : 
    5143             : template <>
    5144             : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
    5145             :                                  double dfSrcX, double dfSrcY, double *pValue,
    5146             :                                  double *padfWeightsHorizontal,
    5147             :                                  double *padfWeightsVertical,
    5148             :                                  double &dfInvWeights)
    5149             : {
    5150             :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5151             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5152             :                                      dfInvWeights);
    5153             : }
    5154             : 
    5155             : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
    5156             : 
    5157             : #endif /* defined(USE_SSE2) */
    5158             : 
    5159             : /************************************************************************/
    5160             : /*                     GWKRoundSourceCoordinates()                      */
    5161             : /************************************************************************/
    5162             : 
    5163        1000 : static void GWKRoundSourceCoordinates(
    5164             :     int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
    5165             :     double dfSrcCoordPrecision, double dfErrorThreshold,
    5166             :     GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
    5167             :     double dfDstY)
    5168             : {
    5169        1000 :     double dfPct = 0.8;
    5170        1000 :     if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
    5171             :     {
    5172        1000 :         dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
    5173             :     }
    5174        1000 :     const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
    5175             : 
    5176      501000 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5177             :     {
    5178      500000 :         const double dfXBefore = padfX[iDstX];
    5179      500000 :         const double dfYBefore = padfY[iDstX];
    5180      500000 :         padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    5181             :                        dfSrcCoordPrecision;
    5182      500000 :         padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    5183             :                        dfSrcCoordPrecision;
    5184             : 
    5185             :         // If we are in an uncertainty zone, go to non-approximated
    5186             :         // transformation.
    5187             :         // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
    5188             :         // be at least 10 times greater than the approximation error.
    5189      500000 :         if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
    5190      399914 :             fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
    5191             :         {
    5192      180090 :             padfX[iDstX] = iDstX + dfDstXOff;
    5193      180090 :             padfY[iDstX] = dfDstY;
    5194      180090 :             padfZ[iDstX] = 0.0;
    5195      180090 :             pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
    5196      180090 :                            padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
    5197      180090 :             padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    5198             :                            dfSrcCoordPrecision;
    5199      180090 :             padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    5200             :                            dfSrcCoordPrecision;
    5201             :         }
    5202             :     }
    5203        1000 : }
    5204             : 
    5205             : /************************************************************************/
    5206             : /*                    GWKCheckAndComputeSrcOffsets()                    */
    5207             : /************************************************************************/
    5208             : static CPL_INLINE bool
    5209   188281000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
    5210             :                              int _iDstY, double *_padfX, double *_padfY,
    5211             :                              int _nSrcXSize, int _nSrcYSize,
    5212             :                              GPtrDiff_t &iSrcOffset)
    5213             : {
    5214   188281000 :     const GDALWarpKernel *_poWK = psJob->poWK;
    5215   194887000 :     for (int iTry = 0; iTry < 2; ++iTry)
    5216             :     {
    5217   194887000 :         if (iTry == 1)
    5218             :         {
    5219             :             // If the source coordinate is slightly outside of the source raster
    5220             :             // retry to transform it alone, so that the exact coordinate
    5221             :             // transformer is used.
    5222             : 
    5223     6605570 :             _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
    5224     6605570 :             _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
    5225     6605570 :             double dfZ = 0;
    5226     6605570 :             _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
    5227     6605570 :                                   _padfX + _iDstX, _padfY + _iDstX, &dfZ,
    5228     6605570 :                                   _pabSuccess + _iDstX);
    5229             :         }
    5230   194887000 :         if (!_pabSuccess[_iDstX])
    5231     3615020 :             return false;
    5232             : 
    5233             :         // If this happens this is likely the symptom of a bug somewhere.
    5234   191272000 :         if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
    5235             :         {
    5236             :             static bool bNanCoordFound = false;
    5237           0 :             if (!bNanCoordFound)
    5238             :             {
    5239           0 :                 CPLDebug("WARP",
    5240             :                          "GWKCheckAndComputeSrcOffsets(): "
    5241             :                          "NaN coordinate found on point %d.",
    5242             :                          _iDstX);
    5243           0 :                 bNanCoordFound = true;
    5244             :             }
    5245           0 :             return false;
    5246             :         }
    5247             : 
    5248             :         /* --------------------------------------------------------------------
    5249             :          */
    5250             :         /*      Figure out what pixel we want in our source raster, and skip */
    5251             :         /*      further processing if it is well off the source image. */
    5252             :         /* --------------------------------------------------------------------
    5253             :          */
    5254             :         /* We test against the value before casting to avoid the */
    5255             :         /* problem of asymmetric truncation effects around zero.  That is */
    5256             :         /* -0.5 will be 0 when cast to an int. */
    5257   191272000 :         if (_padfX[_iDstX] < _poWK->nSrcXOff)
    5258             :         {
    5259             :             // If the source coordinate is slightly outside of the source raster
    5260             :             // retry to transform it alone, so that the exact coordinate
    5261             :             // transformer is used.
    5262    16862100 :             if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
    5263     2889880 :                 continue;
    5264    13972200 :             return false;
    5265             :         }
    5266             : 
    5267   174410000 :         if (_padfY[_iDstX] < _poWK->nSrcYOff)
    5268             :         {
    5269             :             // If the source coordinate is slightly outside of the source raster
    5270             :             // retry to transform it alone, so that the exact coordinate
    5271             :             // transformer is used.
    5272     7904380 :             if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
    5273      636226 :                 continue;
    5274     7268150 :             return false;
    5275             :         }
    5276             : 
    5277             :         // Check for potential overflow when casting from float to int, (if
    5278             :         // operating outside natural projection area, padfX/Y can be a very huge
    5279             :         // positive number before doing the actual conversion), as such cast is
    5280             :         // undefined behavior that can trigger exception with some compilers
    5281             :         // (see #6753)
    5282   166505000 :         if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
    5283             :         {
    5284             :             // If the source coordinate is slightly outside of the source raster
    5285             :             // retry to transform it alone, so that the exact coordinate
    5286             :             // transformer is used.
    5287    13197100 :             if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
    5288     2712810 :                 continue;
    5289    10484300 :             return false;
    5290             :         }
    5291   153308000 :         if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
    5292             :         {
    5293             :             // If the source coordinate is slightly outside of the source raster
    5294             :             // retry to transform it alone, so that the exact coordinate
    5295             :             // transformer is used.
    5296     5693610 :             if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
    5297      366653 :                 continue;
    5298     5326950 :             return false;
    5299             :         }
    5300             : 
    5301   147614000 :         break;
    5302             :     }
    5303             : 
    5304   147614000 :     int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
    5305   147614000 :     int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
    5306   147614000 :     if (iSrcX == _nSrcXSize)
    5307           0 :         iSrcX--;
    5308   147614000 :     if (iSrcY == _nSrcYSize)
    5309           0 :         iSrcY--;
    5310             : 
    5311             :     // Those checks should normally be OK given the previous ones.
    5312   147614000 :     CPLAssert(iSrcX >= 0);
    5313   147614000 :     CPLAssert(iSrcY >= 0);
    5314   147614000 :     CPLAssert(iSrcX < _nSrcXSize);
    5315   147614000 :     CPLAssert(iSrcY < _nSrcYSize);
    5316             : 
    5317   147614000 :     iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
    5318             : 
    5319   147614000 :     return true;
    5320             : }
    5321             : 
    5322             : /************************************************************************/
    5323             : /*                 GWKOneSourceCornerFailsToReproject()                 */
    5324             : /************************************************************************/
    5325             : 
    5326         934 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
    5327             : {
    5328         934 :     GDALWarpKernel *poWK = psJob->poWK;
    5329        2792 :     for (int iY = 0; iY <= 1; ++iY)
    5330             :     {
    5331        5580 :         for (int iX = 0; iX <= 1; ++iX)
    5332             :         {
    5333        3722 :             double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
    5334        3722 :             double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
    5335        3722 :             double dfZTmp = 0;
    5336        3722 :             int nSuccess = FALSE;
    5337        3722 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
    5338             :                                  &dfYTmp, &dfZTmp, &nSuccess);
    5339        3722 :             if (!nSuccess)
    5340           6 :                 return true;
    5341             :         }
    5342             :     }
    5343         928 :     return false;
    5344             : }
    5345             : 
    5346             : /************************************************************************/
    5347             : /*                      GWKAdjustSrcOffsetOnEdge()                      */
    5348             : /************************************************************************/
    5349             : 
    5350        9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
    5351             :                                      GPtrDiff_t &iSrcOffset)
    5352             : {
    5353        9714 :     GDALWarpKernel *poWK = psJob->poWK;
    5354        9714 :     const int nSrcXSize = poWK->nSrcXSize;
    5355        9714 :     const int nSrcYSize = poWK->nSrcYSize;
    5356             : 
    5357             :     // Check if the computed source position slightly altered
    5358             :     // fails to reproject. If so, then we are at the edge of
    5359             :     // the validity area, and it is worth checking neighbour
    5360             :     // source pixels for validity.
    5361        9714 :     int nSuccess = FALSE;
    5362             :     {
    5363        9714 :         double dfXTmp =
    5364        9714 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5365        9714 :         double dfYTmp =
    5366        9714 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5367        9714 :         double dfZTmp = 0;
    5368        9714 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5369             :                              &dfZTmp, &nSuccess);
    5370             :     }
    5371        9714 :     if (nSuccess)
    5372             :     {
    5373        6996 :         double dfXTmp =
    5374        6996 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5375        6996 :         double dfYTmp =
    5376        6996 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5377        6996 :         double dfZTmp = 0;
    5378        6996 :         nSuccess = FALSE;
    5379        6996 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5380             :                              &dfZTmp, &nSuccess);
    5381             :     }
    5382        9714 :     if (nSuccess)
    5383             :     {
    5384        5624 :         double dfXTmp =
    5385        5624 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5386        5624 :         double dfYTmp =
    5387        5624 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5388        5624 :         double dfZTmp = 0;
    5389        5624 :         nSuccess = FALSE;
    5390        5624 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5391             :                              &dfZTmp, &nSuccess);
    5392             :     }
    5393             : 
    5394       14166 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5395        4452 :         CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
    5396             :     {
    5397        1860 :         iSrcOffset++;
    5398        1860 :         return true;
    5399             :     }
    5400       10290 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5401        2436 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
    5402             :     {
    5403        1334 :         iSrcOffset += nSrcXSize;
    5404        1334 :         return true;
    5405             :     }
    5406        7838 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5407        1318 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
    5408             :     {
    5409         956 :         iSrcOffset--;
    5410         956 :         return true;
    5411             :     }
    5412        5924 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5413         360 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
    5414             :     {
    5415         340 :         iSrcOffset -= nSrcXSize;
    5416         340 :         return true;
    5417             :     }
    5418             : 
    5419        5224 :     return false;
    5420             : }
    5421             : 
    5422             : /************************************************************************/
    5423             : /*             GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity()              */
    5424             : /************************************************************************/
    5425             : 
    5426           0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
    5427             :                                                       GPtrDiff_t &iSrcOffset)
    5428             : {
    5429           0 :     GDALWarpKernel *poWK = psJob->poWK;
    5430           0 :     const int nSrcXSize = poWK->nSrcXSize;
    5431           0 :     const int nSrcYSize = poWK->nSrcYSize;
    5432             : 
    5433             :     // Check if the computed source position slightly altered
    5434             :     // fails to reproject. If so, then we are at the edge of
    5435             :     // the validity area, and it is worth checking neighbour
    5436             :     // source pixels for validity.
    5437           0 :     int nSuccess = FALSE;
    5438             :     {
    5439           0 :         double dfXTmp =
    5440           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5441           0 :         double dfYTmp =
    5442           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5443           0 :         double dfZTmp = 0;
    5444           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5445             :                              &dfZTmp, &nSuccess);
    5446             :     }
    5447           0 :     if (nSuccess)
    5448             :     {
    5449           0 :         double dfXTmp =
    5450           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5451           0 :         double dfYTmp =
    5452           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5453           0 :         double dfZTmp = 0;
    5454           0 :         nSuccess = FALSE;
    5455           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5456             :                              &dfZTmp, &nSuccess);
    5457             :     }
    5458           0 :     if (nSuccess)
    5459             :     {
    5460           0 :         double dfXTmp =
    5461           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5462           0 :         double dfYTmp =
    5463           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5464           0 :         double dfZTmp = 0;
    5465           0 :         nSuccess = FALSE;
    5466           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5467             :                              &dfZTmp, &nSuccess);
    5468             :     }
    5469             : 
    5470           0 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5471           0 :         poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >=
    5472             :             SRC_DENSITY_THRESHOLD_FLOAT)
    5473             :     {
    5474           0 :         iSrcOffset++;
    5475           0 :         return true;
    5476             :     }
    5477           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5478           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
    5479             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5480             :     {
    5481           0 :         iSrcOffset += nSrcXSize;
    5482           0 :         return true;
    5483             :     }
    5484           0 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5485           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
    5486             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5487             :     {
    5488           0 :         iSrcOffset--;
    5489           0 :         return true;
    5490             :     }
    5491           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5492           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
    5493             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5494             :     {
    5495           0 :         iSrcOffset -= nSrcXSize;
    5496           0 :         return true;
    5497             :     }
    5498             : 
    5499           0 :     return false;
    5500             : }
    5501             : 
    5502             : /************************************************************************/
    5503             : /*                           GWKGeneralCase()                           */
    5504             : /*                                                                      */
    5505             : /*      This is the most general case.  It attempts to handle all       */
    5506             : /*      possible features with relatively little concern for            */
    5507             : /*      efficiency.                                                     */
    5508             : /************************************************************************/
    5509             : 
    5510         239 : static void GWKGeneralCaseThread(void *pData)
    5511             : {
    5512         239 :     GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
    5513         239 :     GDALWarpKernel *poWK = psJob->poWK;
    5514         239 :     const int iYMin = psJob->iYMin;
    5515         239 :     const int iYMax = psJob->iYMax;
    5516             :     const double dfMultFactorVerticalShiftPipeline =
    5517         239 :         poWK->bApplyVerticalShift
    5518         239 :             ? CPLAtof(CSLFetchNameValueDef(
    5519           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5520             :                   "1.0"))
    5521         239 :             : 0.0;
    5522             :     const bool bAvoidNoDataSingleBand =
    5523         239 :         poWK->nBands == 1 ||
    5524           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    5525         239 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    5526             : 
    5527         239 :     int nDstXSize = poWK->nDstXSize;
    5528         239 :     int nSrcXSize = poWK->nSrcXSize;
    5529         239 :     int nSrcYSize = poWK->nSrcYSize;
    5530             : 
    5531             :     /* -------------------------------------------------------------------- */
    5532             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5533             :     /*      scanlines worth of positions.                                   */
    5534             :     /* -------------------------------------------------------------------- */
    5535             :     // For x, 2 *, because we cache the precomputed values at the end.
    5536             :     double *padfX =
    5537         239 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5538             :     double *padfY =
    5539         239 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5540             :     double *padfZ =
    5541         239 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5542         239 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5543             : 
    5544         239 :     const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
    5545             : 
    5546         239 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5547         239 :     if (poWK->eResample != GRA_NearestNeighbour)
    5548             :     {
    5549         220 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5550             :     }
    5551         239 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5552         239 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5553         239 :     const double dfErrorThreshold = CPLAtof(
    5554         239 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5555             : 
    5556             :     const bool bOneSourceCornerFailsToReproject =
    5557         239 :         GWKOneSourceCornerFailsToReproject(psJob);
    5558             : 
    5559             :     // Precompute values.
    5560        6469 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5561        6230 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5562             : 
    5563             :     /* ==================================================================== */
    5564             :     /*      Loop over output lines.                                         */
    5565             :     /* ==================================================================== */
    5566        6469 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5567             :     {
    5568             :         /* --------------------------------------------------------------------
    5569             :          */
    5570             :         /*      Setup points to transform to source image space. */
    5571             :         /* --------------------------------------------------------------------
    5572             :          */
    5573        6230 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5574        6230 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5575      242390 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5576      236160 :             padfY[iDstX] = dfY;
    5577        6230 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5578             : 
    5579             :         /* --------------------------------------------------------------------
    5580             :          */
    5581             :         /*      Transform the points from destination pixel/line coordinates */
    5582             :         /*      to source pixel/line coordinates. */
    5583             :         /* --------------------------------------------------------------------
    5584             :          */
    5585        6230 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5586             :                              padfY, padfZ, pabSuccess);
    5587        6230 :         if (dfSrcCoordPrecision > 0.0)
    5588             :         {
    5589           0 :             GWKRoundSourceCoordinates(
    5590             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5591             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5592           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5593             :         }
    5594             : 
    5595             :         /* ====================================================================
    5596             :          */
    5597             :         /*      Loop over pixels in output scanline. */
    5598             :         /* ====================================================================
    5599             :          */
    5600      242390 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5601             :         {
    5602      236160 :             GPtrDiff_t iSrcOffset = 0;
    5603      236160 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5604             :                                               padfX, padfY, nSrcXSize,
    5605             :                                               nSrcYSize, iSrcOffset))
    5606           0 :                 continue;
    5607             : 
    5608             :             /* --------------------------------------------------------------------
    5609             :              */
    5610             :             /*      Do not try to apply transparent/invalid source pixels to the
    5611             :              */
    5612             :             /*      destination.  This currently ignores the multi-pixel input
    5613             :              */
    5614             :             /*      of bilinear and cubic resamples. */
    5615             :             /* --------------------------------------------------------------------
    5616             :              */
    5617      236160 :             double dfDensity = 1.0;
    5618             : 
    5619      236160 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5620             :             {
    5621        1200 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5622        1200 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    5623             :                 {
    5624           0 :                     if (!bOneSourceCornerFailsToReproject)
    5625             :                     {
    5626           0 :                         continue;
    5627             :                     }
    5628           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5629             :                                  psJob, iSrcOffset))
    5630             :                     {
    5631           0 :                         dfDensity =
    5632           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5633             :                     }
    5634             :                     else
    5635             :                     {
    5636           0 :                         continue;
    5637             :                     }
    5638             :                 }
    5639             :             }
    5640             : 
    5641      236160 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5642           0 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5643             :             {
    5644           0 :                 if (!bOneSourceCornerFailsToReproject)
    5645             :                 {
    5646           0 :                     continue;
    5647             :                 }
    5648           0 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5649             :                 {
    5650           0 :                     continue;
    5651             :                 }
    5652             :             }
    5653             : 
    5654             :             /* ====================================================================
    5655             :              */
    5656             :             /*      Loop processing each band. */
    5657             :             /* ====================================================================
    5658             :              */
    5659      236160 :             bool bHasFoundDensity = false;
    5660             : 
    5661      236160 :             const GPtrDiff_t iDstOffset =
    5662      236160 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5663      472320 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5664             :             {
    5665      236160 :                 double dfBandDensity = 0.0;
    5666      236160 :                 double dfValueReal = 0.0;
    5667      236160 :                 double dfValueImag = 0.0;
    5668             : 
    5669             :                 /* --------------------------------------------------------------------
    5670             :                  */
    5671             :                 /*      Collect the source value. */
    5672             :                 /* --------------------------------------------------------------------
    5673             :                  */
    5674      236160 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5675             :                     nSrcYSize == 1)
    5676             :                 {
    5677             :                     // FALSE is returned if dfBandDensity == 0, which is
    5678             :                     // checked below.
    5679         568 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValue(
    5680             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
    5681             :                         &dfValueImag));
    5682             :                 }
    5683      235592 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5684             :                 {
    5685         248 :                     GWKBilinearResample4Sample(
    5686         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5687         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5688             :                         &dfValueReal, &dfValueImag);
    5689             :                 }
    5690      235344 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5691             :                 {
    5692         248 :                     GWKCubicResample4Sample(
    5693         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5694         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5695             :                         &dfValueReal, &dfValueImag);
    5696             :                 }
    5697             :                 else
    5698             : #ifdef DEBUG
    5699             :                     // Only useful for clang static analyzer.
    5700      235096 :                     if (psWrkStruct != nullptr)
    5701             : #endif
    5702             :                     {
    5703      235096 :                         psWrkStruct->pfnGWKResample(
    5704      235096 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5705      235096 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5706             :                             &dfValueReal, &dfValueImag, psWrkStruct);
    5707             :                     }
    5708             : 
    5709             :                 // If we didn't find any valid inputs skip to next band.
    5710      236160 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5711           0 :                     continue;
    5712             : 
    5713      236160 :                 if (poWK->bApplyVerticalShift)
    5714             :                 {
    5715           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5716           0 :                         continue;
    5717             :                     // Subtract padfZ[] since the coordinate transformation is
    5718             :                     // from target to source
    5719           0 :                     dfValueReal =
    5720           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5721           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5722             :                 }
    5723             : 
    5724      236160 :                 bHasFoundDensity = true;
    5725             : 
    5726             :                 /* --------------------------------------------------------------------
    5727             :                  */
    5728             :                 /*      We have a computed value from the source.  Now apply it
    5729             :                  * to      */
    5730             :                 /*      the destination pixel. */
    5731             :                 /* --------------------------------------------------------------------
    5732             :                  */
    5733      236160 :                 GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    5734             :                                  dfValueReal, dfValueImag,
    5735             :                                  bAvoidNoDataSingleBand);
    5736             :             }
    5737             : 
    5738      236160 :             if (!bHasFoundDensity)
    5739           0 :                 continue;
    5740             : 
    5741      236160 :             if (!bAvoidNoDataSingleBand)
    5742             :             {
    5743           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    5744             :             }
    5745             : 
    5746             :             /* --------------------------------------------------------------------
    5747             :              */
    5748             :             /*      Update destination density/validity masks. */
    5749             :             /* --------------------------------------------------------------------
    5750             :              */
    5751      236160 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5752             : 
    5753      236160 :             if (poWK->panDstValid != nullptr)
    5754             :             {
    5755           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5756             :             }
    5757             :         } /* Next iDstX */
    5758             : 
    5759             :         /* --------------------------------------------------------------------
    5760             :          */
    5761             :         /*      Report progress to the user, and optionally cancel out. */
    5762             :         /* --------------------------------------------------------------------
    5763             :          */
    5764        6230 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5765           0 :             break;
    5766             :     }
    5767             : 
    5768             :     /* -------------------------------------------------------------------- */
    5769             :     /*      Cleanup and return.                                             */
    5770             :     /* -------------------------------------------------------------------- */
    5771         239 :     CPLFree(padfX);
    5772         239 :     CPLFree(padfY);
    5773         239 :     CPLFree(padfZ);
    5774         239 :     CPLFree(pabSuccess);
    5775         239 :     if (psWrkStruct)
    5776         220 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5777         239 : }
    5778             : 
    5779         239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
    5780             : {
    5781         239 :     return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
    5782             : }
    5783             : 
    5784             : /************************************************************************/
    5785             : /*                            GWKRealCase()                             */
    5786             : /*                                                                      */
    5787             : /*      General case for non-complex data types.                        */
    5788             : /************************************************************************/
    5789             : 
    5790         223 : static void GWKRealCaseThread(void *pData)
    5791             : 
    5792             : {
    5793         223 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    5794         223 :     GDALWarpKernel *poWK = psJob->poWK;
    5795         223 :     const int iYMin = psJob->iYMin;
    5796         223 :     const int iYMax = psJob->iYMax;
    5797             : 
    5798         223 :     const int nDstXSize = poWK->nDstXSize;
    5799         223 :     const int nSrcXSize = poWK->nSrcXSize;
    5800         223 :     const int nSrcYSize = poWK->nSrcYSize;
    5801             :     const double dfMultFactorVerticalShiftPipeline =
    5802         223 :         poWK->bApplyVerticalShift
    5803         223 :             ? CPLAtof(CSLFetchNameValueDef(
    5804           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5805             :                   "1.0"))
    5806         223 :             : 0.0;
    5807             :     const bool bAvoidNoDataSingleBand =
    5808         305 :         poWK->nBands == 1 ||
    5809          82 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    5810         223 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    5811             : 
    5812             :     /* -------------------------------------------------------------------- */
    5813             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5814             :     /*      scanlines worth of positions.                                   */
    5815             :     /* -------------------------------------------------------------------- */
    5816             : 
    5817             :     // For x, 2 *, because we cache the precomputed values at the end.
    5818             :     double *padfX =
    5819         223 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5820             :     double *padfY =
    5821         223 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5822             :     double *padfZ =
    5823         223 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5824         223 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5825             : 
    5826         223 :     const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
    5827             : 
    5828         223 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5829         223 :     if (poWK->eResample != GRA_NearestNeighbour)
    5830             :     {
    5831         181 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5832             :     }
    5833         223 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5834         223 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5835         223 :     const double dfErrorThreshold = CPLAtof(
    5836         223 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5837             : 
    5838         638 :     const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
    5839         415 :                                    poWK->papanBandSrcValid == nullptr &&
    5840         192 :                                    poWK->pafUnifiedSrcDensity != nullptr;
    5841             : 
    5842             :     const bool bOneSourceCornerFailsToReproject =
    5843         223 :         GWKOneSourceCornerFailsToReproject(psJob);
    5844             : 
    5845             :     // Precompute values.
    5846       24657 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5847       24434 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5848             : 
    5849             :     /* ==================================================================== */
    5850             :     /*      Loop over output lines.                                         */
    5851             :     /* ==================================================================== */
    5852       25909 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5853             :     {
    5854             :         /* --------------------------------------------------------------------
    5855             :          */
    5856             :         /*      Setup points to transform to source image space. */
    5857             :         /* --------------------------------------------------------------------
    5858             :          */
    5859       25686 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5860       25686 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5861    44594200 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5862    44568500 :             padfY[iDstX] = dfY;
    5863       25686 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5864             : 
    5865             :         /* --------------------------------------------------------------------
    5866             :          */
    5867             :         /*      Transform the points from destination pixel/line coordinates */
    5868             :         /*      to source pixel/line coordinates. */
    5869             :         /* --------------------------------------------------------------------
    5870             :          */
    5871       25686 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5872             :                              padfY, padfZ, pabSuccess);
    5873       25686 :         if (dfSrcCoordPrecision > 0.0)
    5874             :         {
    5875           0 :             GWKRoundSourceCoordinates(
    5876             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5877             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5878           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5879             :         }
    5880             : 
    5881             :         /* ====================================================================
    5882             :          */
    5883             :         /*      Loop over pixels in output scanline. */
    5884             :         /* ====================================================================
    5885             :          */
    5886    44594200 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5887             :         {
    5888    44568500 :             GPtrDiff_t iSrcOffset = 0;
    5889    44568500 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5890             :                                               padfX, padfY, nSrcXSize,
    5891             :                                               nSrcYSize, iSrcOffset))
    5892    43823900 :                 continue;
    5893             : 
    5894             :             /* --------------------------------------------------------------------
    5895             :              */
    5896             :             /*      Do not try to apply transparent/invalid source pixels to the
    5897             :              */
    5898             :             /*      destination.  This currently ignores the multi-pixel input
    5899             :              */
    5900             :             /*      of bilinear and cubic resamples. */
    5901             :             /* --------------------------------------------------------------------
    5902             :              */
    5903    31812400 :             double dfDensity = 1.0;
    5904             : 
    5905    31812400 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5906             :             {
    5907     1669560 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5908     1669560 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    5909             :                 {
    5910     1538480 :                     if (!bOneSourceCornerFailsToReproject)
    5911             :                     {
    5912     1538480 :                         continue;
    5913             :                     }
    5914           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5915             :                                  psJob, iSrcOffset))
    5916             :                     {
    5917           0 :                         dfDensity =
    5918           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5919             :                     }
    5920             :                     else
    5921             :                     {
    5922           0 :                         continue;
    5923             :                     }
    5924             :                 }
    5925             :             }
    5926             : 
    5927    59903100 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5928    29629200 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5929             :             {
    5930    29531600 :                 if (!bOneSourceCornerFailsToReproject)
    5931             :                 {
    5932    29529300 :                     continue;
    5933             :                 }
    5934        2229 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5935             :                 {
    5936           0 :                     continue;
    5937             :                 }
    5938             :             }
    5939             : 
    5940             :             /* ====================================================================
    5941             :              */
    5942             :             /*      Loop processing each band. */
    5943             :             /* ====================================================================
    5944             :              */
    5945      744578 :             bool bHasFoundDensity = false;
    5946             : 
    5947      744578 :             const GPtrDiff_t iDstOffset =
    5948      744578 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5949     2092550 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5950             :             {
    5951     1347980 :                 double dfBandDensity = 0.0;
    5952     1347980 :                 double dfValueReal = 0.0;
    5953             : 
    5954             :                 /* --------------------------------------------------------------------
    5955             :                  */
    5956             :                 /*      Collect the source value. */
    5957             :                 /* --------------------------------------------------------------------
    5958             :                  */
    5959     1347980 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5960             :                     nSrcYSize == 1)
    5961             :                 {
    5962             :                     // FALSE is returned if dfBandDensity == 0, which is
    5963             :                     // checked below.
    5964       15516 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
    5965             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
    5966             :                 }
    5967     1332460 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5968             :                 {
    5969        2046 :                     double dfValueImagIgnored = 0.0;
    5970        2046 :                     GWKBilinearResample4Sample(
    5971        2046 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5972        2046 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5973        2046 :                         &dfValueReal, &dfValueImagIgnored);
    5974             :                 }
    5975     1330410 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5976             :                 {
    5977      691552 :                     if (bSrcMaskIsDensity)
    5978             :                     {
    5979      389755 :                         if (poWK->eWorkingDataType == GDT_UInt8)
    5980             :                         {
    5981      389755 :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
    5982      389755 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5983      389755 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5984             :                                 &dfValueReal);
    5985             :                         }
    5986           0 :                         else if (poWK->eWorkingDataType == GDT_UInt16)
    5987             :                         {
    5988             :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<
    5989           0 :                                 GUInt16>(poWK, iBand,
    5990           0 :                                          padfX[iDstX] - poWK->nSrcXOff,
    5991           0 :                                          padfY[iDstX] - poWK->nSrcYOff,
    5992             :                                          &dfBandDensity, &dfValueReal);
    5993             :                         }
    5994             :                         else
    5995             :                         {
    5996           0 :                             GWKCubicResampleSrcMaskIsDensity4SampleReal(
    5997           0 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5998           0 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5999             :                                 &dfValueReal);
    6000             :                         }
    6001             :                     }
    6002             :                     else
    6003             :                     {
    6004      301797 :                         double dfValueImagIgnored = 0.0;
    6005      301797 :                         GWKCubicResample4Sample(
    6006      301797 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6007      301797 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6008             :                             &dfValueReal, &dfValueImagIgnored);
    6009      691552 :                     }
    6010             :                 }
    6011             :                 else
    6012             : #ifdef DEBUG
    6013             :                     // Only useful for clang static analyzer.
    6014      638861 :                     if (psWrkStruct != nullptr)
    6015             : #endif
    6016             :                     {
    6017      638861 :                         double dfValueImagIgnored = 0.0;
    6018      638861 :                         psWrkStruct->pfnGWKResample(
    6019      638861 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6020      638861 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6021             :                             &dfValueReal, &dfValueImagIgnored, psWrkStruct);
    6022             :                     }
    6023             : 
    6024             :                 // If we didn't find any valid inputs skip to next band.
    6025     1347980 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    6026           0 :                     continue;
    6027             : 
    6028     1347980 :                 if (poWK->bApplyVerticalShift)
    6029             :                 {
    6030           0 :                     if (!std::isfinite(padfZ[iDstX]))
    6031           0 :                         continue;
    6032             :                     // Subtract padfZ[] since the coordinate transformation is
    6033             :                     // from target to source
    6034           0 :                     dfValueReal =
    6035           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    6036           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    6037             :                 }
    6038             : 
    6039     1347980 :                 bHasFoundDensity = true;
    6040             : 
    6041             :                 /* --------------------------------------------------------------------
    6042             :                  */
    6043             :                 /*      We have a computed value from the source.  Now apply it
    6044             :                  * to      */
    6045             :                 /*      the destination pixel. */
    6046             :                 /* --------------------------------------------------------------------
    6047             :                  */
    6048     1347980 :                 GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
    6049             :                                      dfValueReal, bAvoidNoDataSingleBand);
    6050             :             }
    6051             : 
    6052      744578 :             if (!bHasFoundDensity)
    6053           0 :                 continue;
    6054             : 
    6055      744578 :             if (!bAvoidNoDataSingleBand)
    6056             :             {
    6057      100295 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    6058             :             }
    6059             : 
    6060             :             /* --------------------------------------------------------------------
    6061             :              */
    6062             :             /*      Update destination density/validity masks. */
    6063             :             /* --------------------------------------------------------------------
    6064             :              */
    6065      744578 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6066             : 
    6067      744578 :             if (poWK->panDstValid != nullptr)
    6068             :             {
    6069      104586 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6070             :             }
    6071             :         }  // Next iDstX.
    6072             : 
    6073             :         /* --------------------------------------------------------------------
    6074             :          */
    6075             :         /*      Report progress to the user, and optionally cancel out. */
    6076             :         /* --------------------------------------------------------------------
    6077             :          */
    6078       25686 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6079           0 :             break;
    6080             :     }
    6081             : 
    6082             :     /* -------------------------------------------------------------------- */
    6083             :     /*      Cleanup and return.                                             */
    6084             :     /* -------------------------------------------------------------------- */
    6085         223 :     CPLFree(padfX);
    6086         223 :     CPLFree(padfY);
    6087         223 :     CPLFree(padfZ);
    6088         223 :     CPLFree(pabSuccess);
    6089         223 :     if (psWrkStruct)
    6090         181 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    6091         223 : }
    6092             : 
    6093         223 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
    6094             : {
    6095         223 :     return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
    6096             : }
    6097             : 
    6098             : /************************************************************************/
    6099             : /*                 GWKCubicResampleNoMasks4MultiBandT()                 */
    6100             : /************************************************************************/
    6101             : 
    6102             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    6103             : /* and enough SSE registries */
    6104             : #if defined(USE_SSE2)
    6105             : 
    6106   141836000 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
    6107             :                                  const __m128 row2, const __m128 row3,
    6108             :                                  const __m128 weightsXY0,
    6109             :                                  const __m128 weightsXY1,
    6110             :                                  const __m128 weightsXY2,
    6111             :                                  const __m128 weightsXY3)
    6112             : {
    6113   992853000 :     return XMMHorizontalAdd(_mm_add_ps(
    6114             :         _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
    6115             :         _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
    6116   141836000 :                    _mm_mul_ps(row3, weightsXY3))));
    6117             : }
    6118             : 
    6119             : template <class T>
    6120    48760542 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
    6121             :                                                double dfSrcX, double dfSrcY,
    6122             :                                                const GPtrDiff_t iDstOffset)
    6123             : {
    6124    48760542 :     const double dfSrcXShifted = dfSrcX - 0.5;
    6125    48760542 :     const int iSrcX = static_cast<int>(dfSrcXShifted);
    6126    48760542 :     const double dfSrcYShifted = dfSrcY - 0.5;
    6127    48760542 :     const int iSrcY = static_cast<int>(dfSrcYShifted);
    6128    48760542 :     const GPtrDiff_t iSrcOffset =
    6129    48760542 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    6130             : 
    6131             :     // Get the bilinear interpolation at the image borders.
    6132    48760542 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    6133    47414062 :         iSrcY + 2 >= poWK->nSrcYSize)
    6134             :     {
    6135     5927540 :         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6136             :         {
    6137             :             T value;
    6138     4445650 :             GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    6139             :                                                &value);
    6140     4445650 :             reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6141             :                 value;
    6142     1481880 :         }
    6143             :     }
    6144             :     else
    6145             :     {
    6146    47278662 :         const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
    6147    47278662 :         const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
    6148             : 
    6149             :         float afCoeffsX[4];
    6150             :         float afCoeffsY[4];
    6151    47278662 :         GWKCubicComputeWeights(fDeltaX, afCoeffsX);
    6152    47278662 :         GWKCubicComputeWeights(fDeltaY, afCoeffsY);
    6153    47278662 :         const auto weightsX = _mm_loadu_ps(afCoeffsX);
    6154             :         const auto weightsXY0 =
    6155    94557424 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
    6156             :         const auto weightsXY1 =
    6157    94557424 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
    6158             :         const auto weightsXY2 =
    6159    94557424 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
    6160             :         const auto weightsXY3 =
    6161    47278662 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
    6162             : 
    6163    47278662 :         const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
    6164             : 
    6165    47278662 :         int iBand = 0;
    6166             :         // Process 2 bands at a time
    6167    94557424 :         for (; iBand + 1 < poWK->nBands; iBand += 2)
    6168             :         {
    6169    47278662 :             const T *CPL_RESTRICT pBand0 =
    6170    47278662 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6171    47278662 :             const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
    6172             :             const auto row1_0 =
    6173    47278662 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6174             :             const auto row2_0 =
    6175    47278662 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6176             :             const auto row3_0 =
    6177    47278662 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6178             : 
    6179    47278662 :             const T *CPL_RESTRICT pBand1 =
    6180    47278662 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
    6181    47278662 :             const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
    6182             :             const auto row1_1 =
    6183    47278662 :                 XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
    6184             :             const auto row2_1 =
    6185    47278662 :                 XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
    6186             :             const auto row3_1 =
    6187    47278662 :                 XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
    6188             : 
    6189             :             const float fValue_0 =
    6190    47278662 :                 Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
    6191             :                              weightsXY1, weightsXY2, weightsXY3);
    6192             : 
    6193             :             const float fValue_1 =
    6194    47278662 :                 Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
    6195             :                              weightsXY1, weightsXY2, weightsXY3);
    6196             : 
    6197    47278662 :             T *CPL_RESTRICT pDstBand0 =
    6198    47278662 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6199    47278662 :             pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
    6200             : 
    6201    47278662 :             T *CPL_RESTRICT pDstBand1 =
    6202    47278662 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
    6203    47278662 :             pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
    6204             :         }
    6205    47278662 :         if (iBand < poWK->nBands)
    6206             :         {
    6207    47278662 :             const T *CPL_RESTRICT pBand0 =
    6208    47278662 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6209    47278662 :             const auto row0 = XMMLoad4Values(pBand0 + iOffset);
    6210             :             const auto row1 =
    6211    47278662 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6212             :             const auto row2 =
    6213    47278662 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6214             :             const auto row3 =
    6215    47278662 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6216             : 
    6217             :             const float fValue =
    6218    47278662 :                 Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
    6219             :                              weightsXY2, weightsXY3);
    6220             : 
    6221    47278662 :             T *CPL_RESTRICT pDstBand =
    6222    47278662 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6223    47278662 :             pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
    6224             :         }
    6225             :     }
    6226             : 
    6227    48760542 :     if (poWK->pafDstDensity)
    6228    46606601 :         poWK->pafDstDensity[iDstOffset] = 1.0f;
    6229    48760542 : }
    6230             : 
    6231             : #endif  // defined(USE_SSE2)
    6232             : 
    6233             : /************************************************************************/
    6234             : /*          GWKResampleNoMasksOrDstDensityOnlyThreadInternal()          */
    6235             : /************************************************************************/
    6236             : 
    6237             : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
    6238        2028 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
    6239             : 
    6240             : {
    6241        2028 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6242        2028 :     GDALWarpKernel *poWK = psJob->poWK;
    6243        2028 :     const int iYMin = psJob->iYMin;
    6244        2028 :     const int iYMax = psJob->iYMax;
    6245        2010 :     const double dfMultFactorVerticalShiftPipeline =
    6246        2028 :         poWK->bApplyVerticalShift
    6247          18 :             ? CPLAtof(CSLFetchNameValueDef(
    6248          18 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6249             :                   "1.0"))
    6250             :             : 0.0;
    6251             : 
    6252        2028 :     const int nDstXSize = poWK->nDstXSize;
    6253        2028 :     const int nSrcXSize = poWK->nSrcXSize;
    6254        2028 :     const int nSrcYSize = poWK->nSrcYSize;
    6255             : 
    6256             :     /* -------------------------------------------------------------------- */
    6257             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6258             :     /*      scanlines worth of positions.                                   */
    6259             :     /* -------------------------------------------------------------------- */
    6260             : 
    6261             :     // For x, 2 *, because we cache the precomputed values at the end.
    6262             :     double *padfX =
    6263        2028 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6264             :     double *padfY =
    6265        2028 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6266             :     double *padfZ =
    6267        2028 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6268        2028 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6269             : 
    6270        2028 :     const int nXRadius = poWK->nXRadius;
    6271             :     double *padfWeightsX =
    6272        2028 :         static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
    6273             :     double *padfWeightsY = static_cast<double *>(
    6274        2028 :         CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
    6275        2028 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6276        2028 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6277        2028 :     const double dfErrorThreshold = CPLAtof(
    6278        2028 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6279             : 
    6280             :     // Precompute values.
    6281      509839 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6282      507811 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6283             : 
    6284             :     /* ==================================================================== */
    6285             :     /*      Loop over output lines.                                         */
    6286             :     /* ==================================================================== */
    6287      316415 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6288             :     {
    6289             :         /* --------------------------------------------------------------------
    6290             :          */
    6291             :         /*      Setup points to transform to source image space. */
    6292             :         /* --------------------------------------------------------------------
    6293             :          */
    6294      314388 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6295      314388 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6296   110215489 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6297   109901005 :             padfY[iDstX] = dfY;
    6298      314388 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6299             : 
    6300             :         /* --------------------------------------------------------------------
    6301             :          */
    6302             :         /*      Transform the points from destination pixel/line coordinates */
    6303             :         /*      to source pixel/line coordinates. */
    6304             :         /* --------------------------------------------------------------------
    6305             :          */
    6306      314388 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6307             :                              padfY, padfZ, pabSuccess);
    6308      314388 :         if (dfSrcCoordPrecision > 0.0)
    6309             :         {
    6310        1000 :             GWKRoundSourceCoordinates(
    6311             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6312             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6313        1000 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6314             :         }
    6315             : 
    6316             :         /* ====================================================================
    6317             :          */
    6318             :         /*      Loop over pixels in output scanline. */
    6319             :         /* ====================================================================
    6320             :          */
    6321   110215489 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6322             :         {
    6323   109901005 :             GPtrDiff_t iSrcOffset = 0;
    6324   109901005 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6325             :                                               padfX, padfY, nSrcXSize,
    6326             :                                               nSrcYSize, iSrcOffset))
    6327    61411278 :                 continue;
    6328             : 
    6329             :             /* ====================================================================
    6330             :              */
    6331             :             /*      Loop processing each band. */
    6332             :             /* ====================================================================
    6333             :              */
    6334    97250319 :             const GPtrDiff_t iDstOffset =
    6335    97250319 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6336             : 
    6337             : #if defined(USE_SSE2)
    6338             :             if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
    6339             :                           (std::is_same<T, GByte>::value ||
    6340             :                            std::is_same<T, GUInt16>::value))
    6341             :             {
    6342    49826241 :                 if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
    6343             :                 {
    6344    48760542 :                     GWKCubicResampleNoMasks4MultiBandT<T>(
    6345    48760542 :                         poWK, padfX[iDstX] - poWK->nSrcXOff,
    6346    48760542 :                         padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
    6347             : 
    6348    48760542 :                     continue;
    6349             :                 }
    6350             :             }
    6351             : #endif  // defined(USE_SSE2)
    6352             : 
    6353    48489690 :             [[maybe_unused]] double dfInvWeights = 0;
    6354   134905636 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6355             :             {
    6356    86415430 :                 T value = 0;
    6357             :                 if constexpr (eResample == GRA_NearestNeighbour)
    6358             :                 {
    6359    78494030 :                     value = reinterpret_cast<T *>(
    6360    78494030 :                         poWK->papabySrcImage[iBand])[iSrcOffset];
    6361             :                 }
    6362             :                 else if constexpr (bUse4SamplesFormula)
    6363             :                 {
    6364             :                     if constexpr (eResample == GRA_Bilinear)
    6365     3845071 :                         GWKBilinearResampleNoMasks4SampleT(
    6366     3845071 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6367     3845071 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6368             :                     else
    6369     2300964 :                         GWKCubicResampleNoMasks4SampleT(
    6370     2300964 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6371     2300964 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6372             :                 }
    6373             :                 else
    6374             :                 {
    6375     1775365 :                     GWKResampleNoMasksT(
    6376     1775365 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6377     1775365 :                         padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
    6378             :                         padfWeightsY, dfInvWeights);
    6379             :                 }
    6380             : 
    6381    86415430 :                 if (poWK->bApplyVerticalShift)
    6382             :                 {
    6383         818 :                     if (!std::isfinite(padfZ[iDstX]))
    6384           0 :                         continue;
    6385             :                     // Subtract padfZ[] since the coordinate transformation is
    6386             :                     // from target to source
    6387         818 :                     value = GWKClampValueT<T>(
    6388         818 :                         double(value) * poWK->dfMultFactorVerticalShift -
    6389         818 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6390             :                 }
    6391             : 
    6392    86415430 :                 if (poWK->pafDstDensity)
    6393    13020199 :                     poWK->pafDstDensity[iDstOffset] = 1.0f;
    6394             : 
    6395    86415430 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6396             :                     value;
    6397             :             }
    6398             :         }
    6399             : 
    6400             :         /* --------------------------------------------------------------------
    6401             :          */
    6402             :         /*      Report progress to the user, and optionally cancel out. */
    6403             :         /* --------------------------------------------------------------------
    6404             :          */
    6405      314388 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6406           1 :             break;
    6407             :     }
    6408             : 
    6409             :     /* -------------------------------------------------------------------- */
    6410             :     /*      Cleanup and return.                                             */
    6411             :     /* -------------------------------------------------------------------- */
    6412        2028 :     CPLFree(padfX);
    6413        2028 :     CPLFree(padfY);
    6414        2028 :     CPLFree(padfZ);
    6415        2028 :     CPLFree(pabSuccess);
    6416        2028 :     CPLFree(padfWeightsX);
    6417        2028 :     CPLFree(padfWeightsY);
    6418        2028 : }
    6419             : 
    6420             : template <class T, GDALResampleAlg eResample>
    6421        1004 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
    6422             : {
    6423        1004 :     GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6424             :         pData);
    6425        1004 : }
    6426             : 
    6427             : template <class T, GDALResampleAlg eResample>
    6428        1024 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
    6429             : 
    6430             : {
    6431        1024 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6432        1024 :     GDALWarpKernel *poWK = psJob->poWK;
    6433             :     static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
    6434        1024 :     const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
    6435        1024 :     if (bUse4SamplesFormula)
    6436         967 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
    6437             :             pData);
    6438             :     else
    6439          57 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6440             :             pData);
    6441        1024 : }
    6442             : 
    6443         953 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6444             : {
    6445         953 :     return GWKRun(
    6446             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
    6447         953 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
    6448             : }
    6449             : 
    6450         128 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6451             : {
    6452         128 :     return GWKRun(
    6453             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
    6454             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
    6455         128 :                                                            GRA_Bilinear>);
    6456             : }
    6457             : 
    6458         850 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6459             : {
    6460         850 :     return GWKRun(
    6461             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
    6462         850 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
    6463             : }
    6464             : 
    6465           9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6466             : {
    6467           9 :     return GWKRun(
    6468             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
    6469           9 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
    6470             : }
    6471             : 
    6472             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6473             : 
    6474             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6475             : {
    6476             :     return GWKRun(
    6477             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
    6478             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
    6479             : }
    6480             : #endif
    6481             : 
    6482          12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6483             : {
    6484          12 :     return GWKRun(
    6485             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
    6486          12 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
    6487             : }
    6488             : 
    6489             : /************************************************************************/
    6490             : /*                          GWKNearestByte()                            */
    6491             : /*                                                                      */
    6492             : /*      Case for 8bit input data with nearest neighbour resampling      */
    6493             : /*      using valid flags. Should be as fast as possible for this       */
    6494             : /*      particular transformation type.                                 */
    6495             : /************************************************************************/
    6496             : 
    6497         472 : template <class T> static void GWKNearestThread(void *pData)
    6498             : 
    6499             : {
    6500         472 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6501         472 :     GDALWarpKernel *poWK = psJob->poWK;
    6502         472 :     const int iYMin = psJob->iYMin;
    6503         472 :     const int iYMax = psJob->iYMax;
    6504         472 :     const double dfMultFactorVerticalShiftPipeline =
    6505         472 :         poWK->bApplyVerticalShift
    6506           0 :             ? CPLAtof(CSLFetchNameValueDef(
    6507           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6508             :                   "1.0"))
    6509             :             : 0.0;
    6510         472 :     const bool bAvoidNoDataSingleBand =
    6511         538 :         poWK->nBands == 1 ||
    6512          66 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    6513             :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    6514             : 
    6515         472 :     const int nDstXSize = poWK->nDstXSize;
    6516         472 :     const int nSrcXSize = poWK->nSrcXSize;
    6517         472 :     const int nSrcYSize = poWK->nSrcYSize;
    6518             : 
    6519             :     /* -------------------------------------------------------------------- */
    6520             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6521             :     /*      scanlines worth of positions.                                   */
    6522             :     /* -------------------------------------------------------------------- */
    6523             : 
    6524             :     // For x, 2 *, because we cache the precomputed values at the end.
    6525             :     double *padfX =
    6526         472 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6527             :     double *padfY =
    6528         472 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6529             :     double *padfZ =
    6530         472 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6531         472 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6532             : 
    6533         472 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6534         472 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6535         472 :     const double dfErrorThreshold = CPLAtof(
    6536         472 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6537             : 
    6538             :     const bool bOneSourceCornerFailsToReproject =
    6539         472 :         GWKOneSourceCornerFailsToReproject(psJob);
    6540             : 
    6541             :     // Precompute values.
    6542       79763 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6543       79291 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6544             : 
    6545             :     /* ==================================================================== */
    6546             :     /*      Loop over output lines.                                         */
    6547             :     /* ==================================================================== */
    6548       63919 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6549             :     {
    6550             : 
    6551             :         /* --------------------------------------------------------------------
    6552             :          */
    6553             :         /*      Setup points to transform to source image space. */
    6554             :         /* --------------------------------------------------------------------
    6555             :          */
    6556       63447 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6557       63447 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6558    33638877 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6559    33575441 :             padfY[iDstX] = dfY;
    6560       63447 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6561             : 
    6562             :         /* --------------------------------------------------------------------
    6563             :          */
    6564             :         /*      Transform the points from destination pixel/line coordinates */
    6565             :         /*      to source pixel/line coordinates. */
    6566             :         /* --------------------------------------------------------------------
    6567             :          */
    6568       63447 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6569             :                              padfY, padfZ, pabSuccess);
    6570       63447 :         if (dfSrcCoordPrecision > 0.0)
    6571             :         {
    6572           0 :             GWKRoundSourceCoordinates(
    6573             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6574             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6575           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6576             :         }
    6577             :         /* ====================================================================
    6578             :          */
    6579             :         /*      Loop over pixels in output scanline. */
    6580             :         /* ====================================================================
    6581             :          */
    6582    33638877 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6583             :         {
    6584    33575441 :             GPtrDiff_t iSrcOffset = 0;
    6585    33575441 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6586             :                                               padfX, padfY, nSrcXSize,
    6587             :                                               nSrcYSize, iSrcOffset))
    6588    21187043 :                 continue;
    6589             : 
    6590             :             /* --------------------------------------------------------------------
    6591             :              */
    6592             :             /*      Do not try to apply invalid source pixels to the dest. */
    6593             :             /* --------------------------------------------------------------------
    6594             :              */
    6595    24833405 :             if (poWK->panUnifiedSrcValid != nullptr &&
    6596     6517835 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6597             :             {
    6598     4924382 :                 if (!bOneSourceCornerFailsToReproject)
    6599             :                 {
    6600     4916896 :                     continue;
    6601             :                 }
    6602        7485 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    6603             :                 {
    6604        5224 :                     continue;
    6605             :                 }
    6606             :             }
    6607             : 
    6608             :             /* --------------------------------------------------------------------
    6609             :              */
    6610             :             /*      Do not try to apply transparent source pixels to the
    6611             :              * destination.*/
    6612             :             /* --------------------------------------------------------------------
    6613             :              */
    6614    13393380 :             double dfDensity = 1.0;
    6615             : 
    6616    13393380 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    6617             :             {
    6618     1557335 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    6619     1557335 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    6620     1005075 :                     continue;
    6621             :             }
    6622             : 
    6623             :             /* ====================================================================
    6624             :              */
    6625             :             /*      Loop processing each band. */
    6626             :             /* ====================================================================
    6627             :              */
    6628             : 
    6629    12388398 :             const GPtrDiff_t iDstOffset =
    6630    12388398 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6631             : 
    6632    27338858 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6633             :             {
    6634    14950460 :                 T value = 0;
    6635    14950460 :                 double dfBandDensity = 0.0;
    6636             : 
    6637             :                 /* --------------------------------------------------------------------
    6638             :                  */
    6639             :                 /*      Collect the source value. */
    6640             :                 /* --------------------------------------------------------------------
    6641             :                  */
    6642    14950460 :                 if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
    6643             :                                  &value))
    6644             :                 {
    6645             : 
    6646    14950460 :                     if (poWK->bApplyVerticalShift)
    6647             :                     {
    6648           0 :                         if (!std::isfinite(padfZ[iDstX]))
    6649           0 :                             continue;
    6650             :                         // Subtract padfZ[] since the coordinate transformation
    6651             :                         // is from target to source
    6652           0 :                         value = GWKClampValueT<T>(
    6653           0 :                             double(value) * poWK->dfMultFactorVerticalShift -
    6654           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6655             :                     }
    6656             : 
    6657    14950460 :                     GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    6658             :                                           dfBandDensity, value,
    6659             :                                           bAvoidNoDataSingleBand);
    6660             :                 }
    6661             :             }
    6662             : 
    6663             :             /* --------------------------------------------------------------------
    6664             :              */
    6665             :             /*      Mark this pixel valid/opaque in the output. */
    6666             :             /* --------------------------------------------------------------------
    6667             :              */
    6668             : 
    6669    12388398 :             if (!bAvoidNoDataSingleBand)
    6670             :             {
    6671      424278 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    6672             :             }
    6673             : 
    6674    12388398 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6675             : 
    6676    12388398 :             if (poWK->panDstValid != nullptr)
    6677             :             {
    6678    11118345 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6679             :             }
    6680             :         } /* Next iDstX */
    6681             : 
    6682             :         /* --------------------------------------------------------------------
    6683             :          */
    6684             :         /*      Report progress to the user, and optionally cancel out. */
    6685             :         /* --------------------------------------------------------------------
    6686             :          */
    6687       63447 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6688           0 :             break;
    6689             :     }
    6690             : 
    6691             :     /* -------------------------------------------------------------------- */
    6692             :     /*      Cleanup and return.                                             */
    6693             :     /* -------------------------------------------------------------------- */
    6694         472 :     CPLFree(padfX);
    6695         472 :     CPLFree(padfY);
    6696         472 :     CPLFree(padfZ);
    6697         472 :     CPLFree(pabSuccess);
    6698         472 : }
    6699             : 
    6700         360 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
    6701             : {
    6702         360 :     return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
    6703             : }
    6704             : 
    6705          14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6706             : {
    6707          14 :     return GWKRun(
    6708             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
    6709          14 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
    6710             : }
    6711             : 
    6712           5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6713             : {
    6714           5 :     return GWKRun(
    6715             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
    6716             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
    6717           5 :                                                            GRA_Bilinear>);
    6718             : }
    6719             : 
    6720           6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6721             : {
    6722           6 :     return GWKRun(
    6723             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
    6724             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
    6725           6 :                                                            GRA_Bilinear>);
    6726             : }
    6727             : 
    6728           4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6729             : {
    6730           4 :     return GWKRun(
    6731             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
    6732             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
    6733           4 :                                                            GRA_Bilinear>);
    6734             : }
    6735             : 
    6736             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6737             : 
    6738             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6739             : {
    6740             :     return GWKRun(
    6741             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
    6742             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
    6743             :                                                            GRA_Bilinear>);
    6744             : }
    6745             : #endif
    6746             : 
    6747           5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6748             : {
    6749           5 :     return GWKRun(
    6750             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
    6751           5 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
    6752             : }
    6753             : 
    6754          14 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6755             : {
    6756          14 :     return GWKRun(
    6757             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
    6758          14 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
    6759             : }
    6760             : 
    6761           6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6762             : {
    6763           6 :     return GWKRun(
    6764             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
    6765           6 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
    6766             : }
    6767             : 
    6768           5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6769             : {
    6770           5 :     return GWKRun(
    6771             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
    6772           5 :         GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
    6773             : }
    6774             : 
    6775          48 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
    6776             : {
    6777          48 :     return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
    6778             : }
    6779             : 
    6780          10 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
    6781             : {
    6782          10 :     return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
    6783             : }
    6784             : 
    6785          11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6786             : {
    6787          11 :     return GWKRun(
    6788             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
    6789          11 :         GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
    6790             : }
    6791             : 
    6792          50 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
    6793             : {
    6794          50 :     return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
    6795             : }
    6796             : 
    6797             : /************************************************************************/
    6798             : /*                           GWKAverageOrMode()                         */
    6799             : /*                                                                      */
    6800             : /************************************************************************/
    6801             : 
    6802             : #define COMPUTE_WEIGHT_Y(iSrcY)                                                \
    6803             :     ((iSrcY == iSrcYMin)                                                       \
    6804             :          ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin))        \
    6805             :      : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax)                       \
    6806             :                                : 1.0)
    6807             : 
    6808             : #define COMPUTE_WEIGHT(iSrcX, dfWeightY)                                       \
    6809             :     ((iSrcX == iSrcXMin)       ? ((iSrcXMin + 1 == iSrcXMax)                   \
    6810             :                                       ? dfWeightY                              \
    6811             :                                       : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
    6812             :      : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax))         \
    6813             :                                : dfWeightY)
    6814             : 
    6815             : static void GWKAverageOrModeThread(void *pData);
    6816             : 
    6817         163 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
    6818             : {
    6819         163 :     return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
    6820             : }
    6821             : 
    6822             : /************************************************************************/
    6823             : /*                 GWKAverageOrModeComputeLineCoords()                  */
    6824             : /************************************************************************/
    6825             : 
    6826        8183 : static void GWKAverageOrModeComputeLineCoords(
    6827             :     const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
    6828             :     double *padfY2, double *padfZ, double *padfZ2, int *pabSuccess,
    6829             :     int *pabSuccess2, int iDstY, double dfSrcCoordPrecision,
    6830             :     double dfErrorThreshold)
    6831             : {
    6832        8183 :     const GDALWarpKernel *poWK = psJob->poWK;
    6833        8183 :     const int nDstXSize = poWK->nDstXSize;
    6834             : 
    6835             :     // Setup points to transform to source image space.
    6836     2097530 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6837             :     {
    6838     2089340 :         padfX[iDstX] = iDstX + poWK->nDstXOff;
    6839     2089340 :         padfY[iDstX] = iDstY + poWK->nDstYOff;
    6840     2089340 :         padfZ[iDstX] = 0.0;
    6841     2089340 :         padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
    6842     2089340 :         padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
    6843     2089340 :         padfZ2[iDstX] = 0.0;
    6844             :     }
    6845             : 
    6846             :     /* ----------------------------------------------------------------- */
    6847             :     /*      Transform the points from destination pixel/line coordinates */
    6848             :     /*      to source pixel/line coordinates.                            */
    6849             :     /* ----------------------------------------------------------------- */
    6850        8183 :     poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY,
    6851             :                          padfZ, pabSuccess);
    6852        8183 :     poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
    6853             :                          padfY2, padfZ2, pabSuccess2);
    6854             : 
    6855        8183 :     if (dfSrcCoordPrecision > 0.0)
    6856             :     {
    6857           0 :         GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
    6858             :                                   dfSrcCoordPrecision, dfErrorThreshold,
    6859           0 :                                   poWK->pfnTransformer, psJob->pTransformerArg,
    6860           0 :                                   poWK->nDstXOff, iDstY + poWK->nDstYOff);
    6861           0 :         GWKRoundSourceCoordinates(
    6862             :             nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision,
    6863           0 :             dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6864           0 :             1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff);
    6865             :     }
    6866        8183 : }
    6867             : 
    6868             : /************************************************************************/
    6869             : /*                GWKAverageOrModeComputeSourceCoords()                 */
    6870             : /************************************************************************/
    6871             : 
    6872     2089340 : static bool GWKAverageOrModeComputeSourceCoords(
    6873             :     const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
    6874             :     double *padfY2, int iDstX, int iDstY, int nXMargin, int nYMargin,
    6875             :     // Output:
    6876             :     bool &bWrapOverX, double &dfXMin, double &dfYMin, double &dfXMax,
    6877             :     double &dfYMax, int &iSrcXMin, int &iSrcYMin, int &iSrcXMax, int &iSrcYMax)
    6878             : {
    6879     2089340 :     const GDALWarpKernel *poWK = psJob->poWK;
    6880     2089340 :     const int nSrcXSize = poWK->nSrcXSize;
    6881     2089340 :     const int nSrcYSize = poWK->nSrcYSize;
    6882             : 
    6883             :     // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
    6884             :     // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
    6885     2089340 :     if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6886     1992640 :           padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6887     1992640 :           padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6888     1965720 :           padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6889     1965720 :           padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6890     1912820 :           padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6891     1912310 :           padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
    6892     1910810 :           padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
    6893             :     {
    6894      178602 :         return false;
    6895             :     }
    6896             : 
    6897             :     // Compute corners in source crs.
    6898             : 
    6899             :     // The transformation might not have preserved ordering of
    6900             :     // coordinates so do the necessary swapping (#5433).
    6901             :     // NOTE: this is really an approximative fix. To do something
    6902             :     // more precise we would for example need to compute the
    6903             :     // transformation of coordinates in the
    6904             :     // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
    6905             :     // coordinates, and take the bounding box of the got source
    6906             :     // coordinates.
    6907             : 
    6908     1910740 :     if (padfX[iDstX] > padfX2[iDstX])
    6909      268744 :         std::swap(padfX[iDstX], padfX2[iDstX]);
    6910             : 
    6911             :     // Detect situations where the target pixel is close to the
    6912             :     // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
    6913             :     // close to the left-most and right-most columns of the source
    6914             :     // raster. The 2 value below was experimentally determined to
    6915             :     // avoid false-positives and false-negatives.
    6916             :     // Addresses https://github.com/OSGeo/gdal/issues/6478
    6917     1910740 :     bWrapOverX = false;
    6918     1910740 :     const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
    6919     1910740 :     if (poWK->nSrcXOff == 0 && iDstX + 1 < poWK->nDstXSize &&
    6920     1903470 :         2 * padfX[iDstX] - padfX[iDstX + 1] < nThresholdWrapOverX &&
    6921       17795 :         nSrcXSize - padfX2[iDstX] < nThresholdWrapOverX)
    6922             :     {
    6923             :         // Check there is a discontinuity by checking at mid-pixel.
    6924             :         // NOTE: all this remains fragile. To confidently
    6925             :         // detect antimeridian warping we should probably try to access
    6926             :         // georeferenced coordinates, and not rely only on tests on
    6927             :         // image space coordinates. But accessing georeferenced
    6928             :         // coordinates from here is not trivial, and we would for example
    6929             :         // have to handle both geographic, Mercator, etc.
    6930             :         // Let's hope this heuristics is good enough for now.
    6931        1200 :         double x = iDstX + 0.5 + poWK->nDstXOff;
    6932        1200 :         double y = iDstY + poWK->nDstYOff;
    6933        1200 :         double z = 0;
    6934        1200 :         int bSuccess = FALSE;
    6935        1200 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y, &z,
    6936             :                              &bSuccess);
    6937        1200 :         if (bSuccess && x < padfX[iDstX])
    6938             :         {
    6939        1192 :             bWrapOverX = true;
    6940        1192 :             std::swap(padfX[iDstX], padfX2[iDstX]);
    6941        1192 :             padfX2[iDstX] += nSrcXSize;
    6942             :         }
    6943             :     }
    6944             : 
    6945     1910740 :     dfXMin = padfX[iDstX] - poWK->nSrcXOff;
    6946     1910740 :     dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
    6947     1910740 :     constexpr double EPSILON = 1e-10;
    6948             :     // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
    6949     1910740 :     if (!(dfXMax > -EPSILON && dfXMin < nSrcXSize + EPSILON))
    6950         372 :         return false;
    6951     1910370 :     iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPSILON), 0.0));
    6952     1910370 :     iSrcXMax = static_cast<int>(
    6953     1910370 :         std::min(ceil(dfXMax - EPSILON), static_cast<double>(INT_MAX)));
    6954     1910370 :     if (!bWrapOverX)
    6955     1909180 :         iSrcXMax = std::min(iSrcXMax, nSrcXSize);
    6956     1910370 :     if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
    6957         472 :         iSrcXMax++;
    6958             : 
    6959     1910370 :     if (padfY[iDstX] > padfY2[iDstX])
    6960      270117 :         std::swap(padfY[iDstX], padfY2[iDstX]);
    6961     1910370 :     dfYMin = padfY[iDstX] - poWK->nSrcYOff;
    6962     1910370 :     dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
    6963             :     // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
    6964     1910370 :     if (!(dfYMax > -EPSILON && dfYMin < nSrcYSize + EPSILON))
    6965         238 :         return false;
    6966     1910130 :     iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPSILON), 0.0));
    6967     1910130 :     iSrcYMax = std::min(static_cast<int>(ceil(dfYMax - EPSILON)), nSrcYSize);
    6968     1910130 :     if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
    6969           0 :         iSrcYMax++;
    6970             : 
    6971     1910130 :     return true;
    6972             : }
    6973             : 
    6974             : /************************************************************************/
    6975             : /*                          GWKModeRealType()                           */
    6976             : /************************************************************************/
    6977             : 
    6978       17780 : template <class T> static inline bool IsSame(T a, T b)
    6979             : {
    6980       17780 :     return a == b;
    6981             : }
    6982             : 
    6983           0 : template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b)
    6984             : {
    6985           0 :     return a == b || (CPLIsNan(a) && CPLIsNan(b));
    6986             : }
    6987             : 
    6988          18 : template <> bool IsSame<float>(float a, float b)
    6989             : {
    6990          18 :     return a == b || (std::isnan(a) && std::isnan(b));
    6991             : }
    6992             : 
    6993          56 : template <> bool IsSame<double>(double a, double b)
    6994             : {
    6995          56 :     return a == b || (std::isnan(a) && std::isnan(b));
    6996             : }
    6997             : 
    6998          19 : template <class T> static void GWKModeRealType(GWKJobStruct *psJob)
    6999             : {
    7000          19 :     const GDALWarpKernel *poWK = psJob->poWK;
    7001          19 :     const int iYMin = psJob->iYMin;
    7002          19 :     const int iYMax = psJob->iYMax;
    7003          19 :     const int nDstXSize = poWK->nDstXSize;
    7004          19 :     const int nSrcXSize = poWK->nSrcXSize;
    7005          19 :     const int nSrcYSize = poWK->nSrcYSize;
    7006          19 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7007             : 
    7008          19 :     T *pVals = nullptr;
    7009          19 :     float *pafCounts = nullptr;
    7010             : 
    7011          19 :     if (nSrcXSize > 0 && nSrcYSize > 0)
    7012             :     {
    7013             :         pVals = static_cast<T *>(
    7014          19 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(T)));
    7015             :         pafCounts = static_cast<float *>(
    7016          19 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    7017          19 :         if (pVals == nullptr || pafCounts == nullptr)
    7018             :         {
    7019           0 :             VSIFree(pVals);
    7020           0 :             VSIFree(pafCounts);
    7021           0 :             return;
    7022             :         }
    7023             :     }
    7024             : 
    7025             :     /* -------------------------------------------------------------------- */
    7026             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7027             :     /*      scanlines worth of positions.                                   */
    7028             :     /* -------------------------------------------------------------------- */
    7029             : 
    7030             :     double *padfX =
    7031          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7032             :     double *padfY =
    7033          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7034             :     double *padfZ =
    7035          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7036             :     double *padfX2 =
    7037          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7038             :     double *padfY2 =
    7039          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7040             :     double *padfZ2 =
    7041          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7042          19 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7043          19 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7044             : 
    7045          19 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7046          19 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7047          19 :     const double dfErrorThreshold = CPLAtof(
    7048          19 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7049          19 :     const bool bAvoidNoDataSingleBand =
    7050          19 :         poWK->nBands == 1 ||
    7051           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7052             :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    7053             : 
    7054          19 :     const int nXMargin =
    7055          19 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7056          19 :     const int nYMargin =
    7057          19 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7058             : 
    7059             :     /* ==================================================================== */
    7060             :     /*      Loop over output lines.                                         */
    7061             :     /* ==================================================================== */
    7062         116 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7063             :     {
    7064          97 :         GWKAverageOrModeComputeLineCoords(
    7065             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7066             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7067             : 
    7068             :         // Loop over pixels in output scanline.
    7069        3514 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7070             :         {
    7071        3417 :             GPtrDiff_t iSrcOffset = 0;
    7072        3417 :             double dfDensity = 1.0;
    7073        3417 :             bool bHasFoundDensity = false;
    7074             : 
    7075        3417 :             bool bWrapOverX = false;
    7076        3417 :             double dfXMin = 0;
    7077        3417 :             double dfYMin = 0;
    7078        3417 :             double dfXMax = 0;
    7079        3417 :             double dfYMax = 0;
    7080        3417 :             int iSrcXMin = 0;
    7081        3417 :             int iSrcYMin = 0;
    7082        3417 :             int iSrcXMax = 0;
    7083        3417 :             int iSrcYMax = 0;
    7084        3417 :             if (!GWKAverageOrModeComputeSourceCoords(
    7085             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7086             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7087             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7088             :             {
    7089           0 :                 continue;
    7090             :             }
    7091             : 
    7092        3417 :             const GPtrDiff_t iDstOffset =
    7093        3417 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7094             : 
    7095             :             // Loop processing each band.
    7096        6834 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7097             :             {
    7098        3417 :                 double dfBandDensity = 0.0;
    7099             : 
    7100        3417 :                 int nBins = 0;
    7101        3417 :                 int iModeIndex = -1;
    7102        3417 :                 T nVal{};
    7103             : 
    7104       10248 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7105             :                 {
    7106        6831 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7107        6831 :                     iSrcOffset =
    7108        6831 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7109       20530 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7110             :                          iSrcX++, iSrcOffset++)
    7111             :                     {
    7112       13699 :                         if (bWrapOverX)
    7113           0 :                             iSrcOffset =
    7114           0 :                                 (iSrcX % nSrcXSize) +
    7115           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7116             : 
    7117       13699 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7118           0 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7119           0 :                             continue;
    7120             : 
    7121       13699 :                         if (GWKGetPixelT(poWK, iBand, iSrcOffset,
    7122       27398 :                                          &dfBandDensity, &nVal) &&
    7123       13699 :                             dfBandDensity > BAND_DENSITY_THRESHOLD)
    7124             :                         {
    7125       13699 :                             const double dfWeight =
    7126       13699 :                                 COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7127             : 
    7128             :                             // Check array for existing entry.
    7129       13699 :                             int i = 0;
    7130       29194 :                             for (i = 0; i < nBins; ++i)
    7131             :                             {
    7132       17807 :                                 if (IsSame(pVals[i], nVal))
    7133             :                                 {
    7134             : 
    7135        2312 :                                     pafCounts[i] +=
    7136        2312 :                                         static_cast<float>(dfWeight);
    7137        2312 :                                     bool bValIsMaxCount =
    7138        2312 :                                         (pafCounts[i] > pafCounts[iModeIndex]);
    7139             : 
    7140        2312 :                                     if (!bValIsMaxCount &&
    7141        1498 :                                         pafCounts[i] == pafCounts[iModeIndex])
    7142             :                                     {
    7143        1490 :                                         switch (eTieStrategy)
    7144             :                                         {
    7145        1477 :                                             case GWKTS_First:
    7146        1477 :                                                 break;
    7147           6 :                                             case GWKTS_Min:
    7148           6 :                                                 bValIsMaxCount =
    7149           6 :                                                     nVal < pVals[iModeIndex];
    7150           6 :                                                 break;
    7151           7 :                                             case GWKTS_Max:
    7152           7 :                                                 bValIsMaxCount =
    7153           7 :                                                     nVal > pVals[iModeIndex];
    7154           7 :                                                 break;
    7155             :                                         }
    7156             :                                     }
    7157             : 
    7158        2312 :                                     if (bValIsMaxCount)
    7159             :                                     {
    7160         817 :                                         iModeIndex = i;
    7161             :                                     }
    7162             : 
    7163        2312 :                                     break;
    7164             :                                 }
    7165             :                             }
    7166             : 
    7167             :                             // Add to arr if entry not already there.
    7168       13699 :                             if (i == nBins)
    7169             :                             {
    7170       11387 :                                 pVals[i] = nVal;
    7171       11387 :                                 pafCounts[i] = static_cast<float>(dfWeight);
    7172             : 
    7173       11387 :                                 if (iModeIndex < 0)
    7174        3417 :                                     iModeIndex = i;
    7175             : 
    7176       11387 :                                 ++nBins;
    7177             :                             }
    7178             :                         }
    7179             :                     }
    7180             :                 }
    7181             : 
    7182        3417 :                 if (iModeIndex != -1)
    7183             :                 {
    7184        3417 :                     nVal = pVals[iModeIndex];
    7185        3417 :                     dfBandDensity = 1;
    7186        3417 :                     bHasFoundDensity = true;
    7187             :                 }
    7188             : 
    7189             :                 // We have a computed value from the source.  Now apply it
    7190             :                 // to the destination pixel
    7191        3417 :                 if (bHasFoundDensity)
    7192             :                 {
    7193        3417 :                     GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    7194             :                                           dfBandDensity, nVal,
    7195             :                                           bAvoidNoDataSingleBand);
    7196             :                 }
    7197             :             }
    7198             : 
    7199        3417 :             if (!bHasFoundDensity)
    7200           0 :                 continue;
    7201             : 
    7202        3417 :             if (!bAvoidNoDataSingleBand)
    7203             :             {
    7204           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7205             :             }
    7206             : 
    7207             :             /* --------------------------------------------------------------------
    7208             :              */
    7209             :             /*      Update destination density/validity masks. */
    7210             :             /* --------------------------------------------------------------------
    7211             :              */
    7212        3417 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7213             : 
    7214        3417 :             if (poWK->panDstValid != nullptr)
    7215             :             {
    7216           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7217             :             }
    7218             :         } /* Next iDstX */
    7219             : 
    7220             :         /* --------------------------------------------------------------------
    7221             :          */
    7222             :         /*      Report progress to the user, and optionally cancel out. */
    7223             :         /* --------------------------------------------------------------------
    7224             :          */
    7225          97 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    7226           0 :             break;
    7227             :     }
    7228             : 
    7229             :     /* -------------------------------------------------------------------- */
    7230             :     /*      Cleanup and return.                                             */
    7231             :     /* -------------------------------------------------------------------- */
    7232          19 :     CPLFree(padfX);
    7233          19 :     CPLFree(padfY);
    7234          19 :     CPLFree(padfZ);
    7235          19 :     CPLFree(padfX2);
    7236          19 :     CPLFree(padfY2);
    7237          19 :     CPLFree(padfZ2);
    7238          19 :     CPLFree(pabSuccess);
    7239          19 :     CPLFree(pabSuccess2);
    7240          19 :     VSIFree(pVals);
    7241          19 :     VSIFree(pafCounts);
    7242             : }
    7243             : 
    7244             : /************************************************************************/
    7245             : /*                         GWKModeComplexType()                         */
    7246             : /************************************************************************/
    7247             : 
    7248           8 : static void GWKModeComplexType(GWKJobStruct *psJob)
    7249             : {
    7250           8 :     const GDALWarpKernel *poWK = psJob->poWK;
    7251           8 :     const int iYMin = psJob->iYMin;
    7252           8 :     const int iYMax = psJob->iYMax;
    7253           8 :     const int nDstXSize = poWK->nDstXSize;
    7254           8 :     const int nSrcXSize = poWK->nSrcXSize;
    7255           8 :     const int nSrcYSize = poWK->nSrcYSize;
    7256           8 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7257             :     const double dfMultFactorVerticalShiftPipeline =
    7258           8 :         poWK->bApplyVerticalShift
    7259           8 :             ? CPLAtof(CSLFetchNameValueDef(
    7260           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    7261             :                   "1.0"))
    7262           8 :             : 0.0;
    7263             :     const bool bAvoidNoDataSingleBand =
    7264           8 :         poWK->nBands == 1 ||
    7265           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7266           8 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    7267             : 
    7268           8 :     double *padfRealVals = nullptr;
    7269           8 :     double *padfImagVals = nullptr;
    7270           8 :     float *pafCounts = nullptr;
    7271             : 
    7272           8 :     if (nSrcXSize > 0 && nSrcYSize > 0)
    7273             :     {
    7274             :         padfRealVals = static_cast<double *>(
    7275           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
    7276             :         padfImagVals = static_cast<double *>(
    7277           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
    7278             :         pafCounts = static_cast<float *>(
    7279           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    7280           8 :         if (padfRealVals == nullptr || padfImagVals == nullptr ||
    7281             :             pafCounts == nullptr)
    7282             :         {
    7283           0 :             VSIFree(padfRealVals);
    7284           0 :             VSIFree(padfImagVals);
    7285           0 :             VSIFree(pafCounts);
    7286           0 :             return;
    7287             :         }
    7288             :     }
    7289             : 
    7290             :     /* -------------------------------------------------------------------- */
    7291             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7292             :     /*      scanlines worth of positions.                                   */
    7293             :     /* -------------------------------------------------------------------- */
    7294             : 
    7295             :     double *padfX =
    7296           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7297             :     double *padfY =
    7298           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7299             :     double *padfZ =
    7300           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7301             :     double *padfX2 =
    7302           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7303             :     double *padfY2 =
    7304           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7305             :     double *padfZ2 =
    7306           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7307           8 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7308           8 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7309             : 
    7310           8 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7311           8 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7312           8 :     const double dfErrorThreshold = CPLAtof(
    7313           8 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7314             : 
    7315             :     const int nXMargin =
    7316           8 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7317             :     const int nYMargin =
    7318           8 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7319             : 
    7320             :     /* ==================================================================== */
    7321             :     /*      Loop over output lines.                                         */
    7322             :     /* ==================================================================== */
    7323          16 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7324             :     {
    7325           8 :         GWKAverageOrModeComputeLineCoords(
    7326             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7327             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7328             : 
    7329             :         // Loop over pixels in output scanline.
    7330          16 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7331             :         {
    7332           8 :             GPtrDiff_t iSrcOffset = 0;
    7333           8 :             double dfDensity = 1.0;
    7334           8 :             bool bHasFoundDensity = false;
    7335             : 
    7336           8 :             bool bWrapOverX = false;
    7337           8 :             double dfXMin = 0;
    7338           8 :             double dfYMin = 0;
    7339           8 :             double dfXMax = 0;
    7340           8 :             double dfYMax = 0;
    7341           8 :             int iSrcXMin = 0;
    7342           8 :             int iSrcYMin = 0;
    7343           8 :             int iSrcXMax = 0;
    7344           8 :             int iSrcYMax = 0;
    7345           8 :             if (!GWKAverageOrModeComputeSourceCoords(
    7346             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7347             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7348             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7349             :             {
    7350           0 :                 continue;
    7351             :             }
    7352             : 
    7353           8 :             const GPtrDiff_t iDstOffset =
    7354           8 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7355             : 
    7356             :             // Loop processing each band.
    7357          16 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7358             :             {
    7359           8 :                 double dfBandDensity = 0.0;
    7360             : 
    7361           8 :                 int nBins = 0;
    7362           8 :                 int iModeIndex = -1;
    7363           8 :                 double dfValueReal = 0;
    7364           8 :                 double dfValueImag = 0;
    7365             : 
    7366          16 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7367             :                 {
    7368           8 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7369           8 :                     iSrcOffset =
    7370           8 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7371          38 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7372             :                          iSrcX++, iSrcOffset++)
    7373             :                     {
    7374          30 :                         if (bWrapOverX)
    7375           0 :                             iSrcOffset =
    7376           0 :                                 (iSrcX % nSrcXSize) +
    7377           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7378             : 
    7379          30 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7380           0 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7381           0 :                             continue;
    7382             : 
    7383          30 :                         if (GWKGetPixelValue(poWK, iBand, iSrcOffset,
    7384             :                                              &dfBandDensity, &dfValueReal,
    7385          60 :                                              &dfValueImag) &&
    7386          30 :                             dfBandDensity > BAND_DENSITY_THRESHOLD)
    7387             :                         {
    7388          30 :                             const double dfWeight =
    7389          30 :                                 COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7390             : 
    7391             :                             // Check array for existing entry.
    7392          30 :                             int i = 0;
    7393          49 :                             for (i = 0; i < nBins; ++i)
    7394             :                             {
    7395          47 :                                 if (IsSame(padfRealVals[i], dfValueReal) &&
    7396          14 :                                     IsSame(padfImagVals[i], dfValueImag))
    7397             :                                 {
    7398             : 
    7399          14 :                                     pafCounts[i] +=
    7400          14 :                                         static_cast<float>(dfWeight);
    7401          14 :                                     bool bValIsMaxCount =
    7402          14 :                                         (pafCounts[i] > pafCounts[iModeIndex]);
    7403             : 
    7404          14 :                                     if (!bValIsMaxCount &&
    7405           6 :                                         pafCounts[i] == pafCounts[iModeIndex])
    7406             :                                     {
    7407           3 :                                         switch (eTieStrategy)
    7408             :                                         {
    7409           3 :                                             case GWKTS_First:
    7410           3 :                                                 break;
    7411           0 :                                             case GWKTS_Min:
    7412           0 :                                                 bValIsMaxCount =
    7413           0 :                                                     dfValueReal <
    7414           0 :                                                     padfRealVals[iModeIndex];
    7415           0 :                                                 break;
    7416           0 :                                             case GWKTS_Max:
    7417           0 :                                                 bValIsMaxCount =
    7418           0 :                                                     dfValueReal >
    7419           0 :                                                     padfRealVals[iModeIndex];
    7420           0 :                                                 break;
    7421             :                                         }
    7422             :                                     }
    7423             : 
    7424          14 :                                     if (bValIsMaxCount)
    7425             :                                     {
    7426           8 :                                         iModeIndex = i;
    7427             :                                     }
    7428             : 
    7429          14 :                                     break;
    7430             :                                 }
    7431             :                             }
    7432             : 
    7433             :                             // Add to arr if entry not already there.
    7434          30 :                             if (i == nBins)
    7435             :                             {
    7436          16 :                                 padfRealVals[i] = dfValueReal;
    7437          16 :                                 padfImagVals[i] = dfValueImag;
    7438          16 :                                 pafCounts[i] = static_cast<float>(dfWeight);
    7439             : 
    7440          16 :                                 if (iModeIndex < 0)
    7441           8 :                                     iModeIndex = i;
    7442             : 
    7443          16 :                                 ++nBins;
    7444             :                             }
    7445             :                         }
    7446             :                     }
    7447             :                 }
    7448             : 
    7449           8 :                 if (iModeIndex != -1)
    7450             :                 {
    7451           8 :                     dfValueReal = padfRealVals[iModeIndex];
    7452           8 :                     dfValueImag = padfImagVals[iModeIndex];
    7453           8 :                     dfBandDensity = 1;
    7454             : 
    7455           8 :                     if (poWK->bApplyVerticalShift)
    7456             :                     {
    7457           0 :                         if (!std::isfinite(padfZ[iDstX]))
    7458           0 :                             continue;
    7459             :                         // Subtract padfZ[] since the coordinate
    7460             :                         // transformation is from target to source
    7461           0 :                         dfValueReal =
    7462           0 :                             dfValueReal * poWK->dfMultFactorVerticalShift -
    7463           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    7464             :                     }
    7465             : 
    7466           8 :                     bHasFoundDensity = true;
    7467             :                 }
    7468             : 
    7469             :                 // We have a computed value from the source.  Now apply it
    7470             :                 // to the destination pixel
    7471           8 :                 if (bHasFoundDensity)
    7472             :                 {
    7473           8 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    7474             :                                      dfValueReal, dfValueImag,
    7475             :                                      bAvoidNoDataSingleBand);
    7476             :                 }
    7477             :             }
    7478             : 
    7479           8 :             if (!bHasFoundDensity)
    7480           0 :                 continue;
    7481             : 
    7482           8 :             if (!bAvoidNoDataSingleBand)
    7483             :             {
    7484           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7485             :             }
    7486             : 
    7487             :             /* --------------------------------------------------------------------
    7488             :              */
    7489             :             /*      Update destination density/validity masks. */
    7490             :             /* --------------------------------------------------------------------
    7491             :              */
    7492           8 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7493             : 
    7494           8 :             if (poWK->panDstValid != nullptr)
    7495             :             {
    7496           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7497             :             }
    7498             :         } /* Next iDstX */
    7499             : 
    7500             :         /* --------------------------------------------------------------------
    7501             :          */
    7502             :         /*      Report progress to the user, and optionally cancel out. */
    7503             :         /* --------------------------------------------------------------------
    7504             :          */
    7505           8 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    7506           0 :             break;
    7507             :     }
    7508             : 
    7509             :     /* -------------------------------------------------------------------- */
    7510             :     /*      Cleanup and return.                                             */
    7511             :     /* -------------------------------------------------------------------- */
    7512           8 :     CPLFree(padfX);
    7513           8 :     CPLFree(padfY);
    7514           8 :     CPLFree(padfZ);
    7515           8 :     CPLFree(padfX2);
    7516           8 :     CPLFree(padfY2);
    7517           8 :     CPLFree(padfZ2);
    7518           8 :     CPLFree(pabSuccess);
    7519           8 :     CPLFree(pabSuccess2);
    7520           8 :     VSIFree(padfRealVals);
    7521           8 :     VSIFree(padfImagVals);
    7522           8 :     VSIFree(pafCounts);
    7523             : }
    7524             : 
    7525             : /************************************************************************/
    7526             : /*                       GWKAverageOrModeThread()                       */
    7527             : /************************************************************************/
    7528             : 
    7529             : // Overall logic based on GWKGeneralCaseThread().
    7530         163 : static void GWKAverageOrModeThread(void *pData)
    7531             : {
    7532         163 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    7533         163 :     const GDALWarpKernel *poWK = psJob->poWK;
    7534         163 :     const int iYMin = psJob->iYMin;
    7535         163 :     const int iYMax = psJob->iYMax;
    7536             :     const double dfMultFactorVerticalShiftPipeline =
    7537         163 :         poWK->bApplyVerticalShift
    7538         163 :             ? CPLAtof(CSLFetchNameValueDef(
    7539           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    7540             :                   "1.0"))
    7541         163 :             : 0.0;
    7542             :     const bool bAvoidNoDataSingleBand =
    7543         194 :         poWK->nBands == 1 ||
    7544          31 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7545         163 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    7546             : 
    7547         163 :     const int nDstXSize = poWK->nDstXSize;
    7548         163 :     const int nSrcXSize = poWK->nSrcXSize;
    7549             : 
    7550             :     /* -------------------------------------------------------------------- */
    7551             :     /*      Find out which algorithm to use (small optim.)                  */
    7552             :     /* -------------------------------------------------------------------- */
    7553             : 
    7554             :     // Only used for GRA_Mode
    7555         163 :     float *pafCounts = nullptr;
    7556         163 :     int nBins = 0;
    7557         163 :     int nBinsOffset = 0;
    7558         163 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7559             : 
    7560             :     // Only used with Q1, Med and Q3
    7561         163 :     float quant = 0.0f;
    7562             : 
    7563             :     // To control array allocation only when data type is complex
    7564         163 :     const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
    7565             : 
    7566         163 :     if (poWK->eResample == GRA_Mode)
    7567             :     {
    7568          45 :         if (poWK->bApplyVerticalShift)
    7569             :         {
    7570           0 :             return GWKModeComplexType(psJob);
    7571             :         }
    7572             : 
    7573          45 :         switch (poWK->eWorkingDataType)
    7574             :         {
    7575           7 :             case GDT_UInt8:
    7576           7 :                 nBins = 256;
    7577           7 :                 break;
    7578             : 
    7579           0 :             case GDT_Int8:
    7580           0 :                 nBins = 256;
    7581           0 :                 nBinsOffset = nBins / 2;
    7582           0 :                 break;
    7583             : 
    7584           1 :             case GDT_UInt16:
    7585           1 :                 nBins = 65536;
    7586           1 :                 break;
    7587             : 
    7588          10 :             case GDT_Int16:
    7589          10 :                 nBins = 65536;
    7590          10 :                 nBinsOffset = nBins / 2;
    7591          10 :                 break;
    7592             : 
    7593          10 :             case GDT_Int32:
    7594          10 :                 return GWKModeRealType<int32_t>(psJob);
    7595             : 
    7596           1 :             case GDT_UInt32:
    7597           1 :                 return GWKModeRealType<uint32_t>(psJob);
    7598             : 
    7599           1 :             case GDT_Int64:
    7600           1 :                 return GWKModeRealType<int64_t>(psJob);
    7601             : 
    7602           1 :             case GDT_UInt64:
    7603           1 :                 return GWKModeRealType<uint64_t>(psJob);
    7604             : 
    7605           0 :             case GDT_Float16:
    7606           0 :                 return GWKModeRealType<GFloat16>(psJob);
    7607             : 
    7608           4 :             case GDT_Float32:
    7609           4 :                 return GWKModeRealType<float>(psJob);
    7610             : 
    7611           2 :             case GDT_Float64:
    7612           2 :                 return GWKModeRealType<double>(psJob);
    7613             : 
    7614           8 :             case GDT_CInt16:
    7615             :             case GDT_CInt32:
    7616             :             case GDT_CFloat16:
    7617             :             case GDT_CFloat32:
    7618             :             case GDT_CFloat64:
    7619           8 :                 return GWKModeComplexType(psJob);
    7620             : 
    7621           0 :             case GDT_Unknown:
    7622             :             case GDT_TypeCount:
    7623           0 :                 CPLAssert(false);
    7624             :                 return;
    7625             :         }
    7626             : 
    7627          18 :         if (nBins)
    7628             :         {
    7629             :             pafCounts =
    7630          18 :                 static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
    7631          18 :             if (pafCounts == nullptr)
    7632           0 :                 return;
    7633             :         }
    7634             :     }
    7635         118 :     else if (poWK->eResample == GRA_Med)
    7636             :     {
    7637           6 :         quant = 0.5f;
    7638             :     }
    7639         112 :     else if (poWK->eResample == GRA_Q1)
    7640             :     {
    7641          10 :         quant = 0.25f;
    7642             :     }
    7643         102 :     else if (poWK->eResample == GRA_Q3)
    7644             :     {
    7645           5 :         quant = 0.75f;
    7646             :     }
    7647          97 :     else if (poWK->eResample != GRA_Average && poWK->eResample != GRA_RMS &&
    7648          11 :              poWK->eResample != GRA_Min && poWK->eResample != GRA_Max)
    7649             :     {
    7650             :         // Other resample algorithms not permitted here.
    7651           0 :         CPLError(CE_Fatal, CPLE_AppDefined,
    7652             :                  "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
    7653             :                  "illegal resample");
    7654             :     }
    7655             : 
    7656         136 :     CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread()");
    7657             : 
    7658             :     /* -------------------------------------------------------------------- */
    7659             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7660             :     /*      scanlines worth of positions.                                   */
    7661             :     /* -------------------------------------------------------------------- */
    7662             : 
    7663             :     double *padfX =
    7664         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7665             :     double *padfY =
    7666         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7667             :     double *padfZ =
    7668         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7669             :     double *padfX2 =
    7670         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7671             :     double *padfY2 =
    7672         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7673             :     double *padfZ2 =
    7674         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7675         136 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7676         136 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7677             : 
    7678         136 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7679         136 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7680         136 :     const double dfErrorThreshold = CPLAtof(
    7681         136 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7682             : 
    7683             :     const double dfExcludedValuesThreshold =
    7684         136 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7685             :                                      "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
    7686         136 :         100.0;
    7687             :     const double dfNodataValuesThreshold =
    7688         136 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7689             :                                      "NODATA_VALUES_PCT_THRESHOLD", "100")) /
    7690         136 :         100.0;
    7691             : 
    7692             :     const int nXMargin =
    7693         136 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7694             :     const int nYMargin =
    7695         136 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7696             : 
    7697             :     /* ==================================================================== */
    7698             :     /*      Loop over output lines.                                         */
    7699             :     /* ==================================================================== */
    7700        8214 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7701             :     {
    7702        8078 :         GWKAverageOrModeComputeLineCoords(
    7703             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7704             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7705             : 
    7706             :         /* ====================================================================
    7707             :          */
    7708             :         /*      Loop over pixels in output scanline. */
    7709             :         /* ====================================================================
    7710             :          */
    7711     2094000 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7712             :         {
    7713     2085920 :             GPtrDiff_t iSrcOffset = 0;
    7714     2085920 :             double dfDensity = 1.0;
    7715     2085920 :             bool bHasFoundDensity = false;
    7716             : 
    7717     2085920 :             bool bWrapOverX = false;
    7718     2085920 :             double dfXMin = 0;
    7719     2085920 :             double dfYMin = 0;
    7720     2085920 :             double dfXMax = 0;
    7721     2085920 :             double dfYMax = 0;
    7722     2085920 :             int iSrcXMin = 0;
    7723     2085920 :             int iSrcYMin = 0;
    7724     2085920 :             int iSrcXMax = 0;
    7725     2085920 :             int iSrcYMax = 0;
    7726     2085920 :             if (!GWKAverageOrModeComputeSourceCoords(
    7727             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7728             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7729             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7730             :             {
    7731      687183 :                 continue;
    7732             :             }
    7733             : 
    7734     1906710 :             const GPtrDiff_t iDstOffset =
    7735     1906710 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7736             : 
    7737     1906710 :             bool bDone = false;
    7738             : 
    7739             :             // Special Average mode where we process all bands together,
    7740             :             // to avoid averaging tuples that match an entry of m_aadfExcludedValues
    7741     1906710 :             constexpr double EPSILON = 1e-10;
    7742     4614100 :             if (poWK->eResample == GRA_Average &&
    7743      800681 :                 (!poWK->m_aadfExcludedValues.empty() ||
    7744      589832 :                  dfNodataValuesThreshold < 1 - EPSILON) &&
    7745     2707390 :                 !poWK->bApplyVerticalShift && !bIsComplex)
    7746             :             {
    7747      589832 :                 double dfTotalWeightInvalid = 0.0;
    7748      589832 :                 double dfTotalWeightExcluded = 0.0;
    7749      589832 :                 double dfTotalWeightRegular = 0.0;
    7750     1179660 :                 std::vector<double> adfValueReal(poWK->nBands, 0);
    7751     1179660 :                 std::vector<double> adfValueAveraged(poWK->nBands, 0);
    7752             :                 std::vector<int> anCountExcludedValues(
    7753      589832 :                     poWK->m_aadfExcludedValues.size(), 0);
    7754             : 
    7755     2162710 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7756             :                 {
    7757     1572880 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7758     1572880 :                     iSrcOffset =
    7759     1572880 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7760     6291500 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7761             :                          iSrcX++, iSrcOffset++)
    7762             :                     {
    7763     4718620 :                         if (bWrapOverX)
    7764           0 :                             iSrcOffset =
    7765           0 :                                 (iSrcX % nSrcXSize) +
    7766           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7767             : 
    7768     4718620 :                         const double dfWeight =
    7769     4718620 :                             COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7770     4718620 :                         if (dfWeight <= 0)
    7771           0 :                             continue;
    7772             : 
    7773     4718640 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7774          12 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7775             :                         {
    7776           3 :                             dfTotalWeightInvalid += dfWeight;
    7777           3 :                             continue;
    7778             :                         }
    7779             : 
    7780     4718620 :                         bool bAllValid = true;
    7781     8651150 :                         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7782             :                         {
    7783     7340300 :                             double dfBandDensity = 0;
    7784     7340300 :                             double dfValueImagTmp = 0;
    7785    11272800 :                             if (!(GWKGetPixelValue(
    7786             :                                       poWK, iBand, iSrcOffset, &dfBandDensity,
    7787     7340300 :                                       &adfValueReal[iBand], &dfValueImagTmp) &&
    7788     3932530 :                                   dfBandDensity > BAND_DENSITY_THRESHOLD))
    7789             :                             {
    7790     3407770 :                                 bAllValid = false;
    7791     3407770 :                                 break;
    7792             :                             }
    7793             :                         }
    7794             : 
    7795     4718620 :                         if (!bAllValid)
    7796             :                         {
    7797     3407770 :                             dfTotalWeightInvalid += dfWeight;
    7798     3407770 :                             continue;
    7799             :                         }
    7800             : 
    7801     1310850 :                         bool bExcludedValueFound = false;
    7802     2490500 :                         for (size_t i = 0;
    7803     2490500 :                              i < poWK->m_aadfExcludedValues.size(); ++i)
    7804             :                         {
    7805     1179670 :                             if (poWK->m_aadfExcludedValues[i] == adfValueReal)
    7806             :                             {
    7807          22 :                                 bExcludedValueFound = true;
    7808          22 :                                 ++anCountExcludedValues[i];
    7809          22 :                                 dfTotalWeightExcluded += dfWeight;
    7810          22 :                                 break;
    7811             :                             }
    7812             :                         }
    7813     1310850 :                         if (!bExcludedValueFound)
    7814             :                         {
    7815             :                             // Weighted incremental algorithm mean
    7816             :                             // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7817     1310830 :                             dfTotalWeightRegular += dfWeight;
    7818     5243290 :                             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7819             :                             {
    7820     3932460 :                                 adfValueAveraged[iBand] +=
    7821     7864930 :                                     (dfWeight / dfTotalWeightRegular) *
    7822     7864930 :                                     (adfValueReal[iBand] -
    7823     3932460 :                                      adfValueAveraged[iBand]);
    7824             :                             }
    7825             :                         }
    7826             :                     }
    7827             :                 }
    7828             : 
    7829      589832 :                 const double dfTotalWeight = dfTotalWeightInvalid +
    7830             :                                              dfTotalWeightExcluded +
    7831             :                                              dfTotalWeightRegular;
    7832      589832 :                 if (dfTotalWeightInvalid > 0 &&
    7833             :                     dfTotalWeightInvalid >=
    7834      458751 :                         dfNodataValuesThreshold * dfTotalWeight)
    7835             :                 {
    7836             :                     // Do nothing. Let bHasFoundDensity to false.
    7837             :                 }
    7838      131085 :                 else if (dfTotalWeightExcluded > 0 &&
    7839             :                          dfTotalWeightExcluded >=
    7840           7 :                              dfExcludedValuesThreshold * dfTotalWeight)
    7841             :                 {
    7842             :                     // Find the most represented excluded value tuple
    7843           3 :                     size_t iExcludedValue = 0;
    7844           3 :                     int nExcludedValueCount = 0;
    7845           6 :                     for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
    7846             :                          ++i)
    7847             :                     {
    7848           3 :                         if (anCountExcludedValues[i] > nExcludedValueCount)
    7849             :                         {
    7850           3 :                             iExcludedValue = i;
    7851           3 :                             nExcludedValueCount = anCountExcludedValues[i];
    7852             :                         }
    7853             :                     }
    7854             : 
    7855           3 :                     bHasFoundDensity = true;
    7856             : 
    7857          12 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7858             :                     {
    7859           9 :                         GWKSetPixelValue(
    7860             :                             poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
    7861           9 :                             poWK->m_aadfExcludedValues[iExcludedValue][iBand],
    7862             :                             0, bAvoidNoDataSingleBand);
    7863             :                     }
    7864             : 
    7865           3 :                     if (!bAvoidNoDataSingleBand)
    7866             :                     {
    7867           0 :                         GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7868           3 :                     }
    7869             :                 }
    7870      131082 :                 else if (dfTotalWeightRegular > 0)
    7871             :                 {
    7872      131082 :                     bHasFoundDensity = true;
    7873             : 
    7874      524324 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7875             :                     {
    7876      393242 :                         GWKSetPixelValue(poWK, iBand, iDstOffset,
    7877             :                                          /* dfBandDensity = */ 1.0,
    7878      393242 :                                          adfValueAveraged[iBand], 0,
    7879             :                                          bAvoidNoDataSingleBand);
    7880             :                     }
    7881             : 
    7882      131082 :                     if (!bAvoidNoDataSingleBand)
    7883             :                     {
    7884           0 :                         GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7885             :                     }
    7886             :                 }
    7887             : 
    7888             :                 // Skip below loop on bands
    7889      589832 :                 bDone = true;
    7890             :             }
    7891             : 
    7892             :             /* ====================================================================
    7893             :              */
    7894             :             /*      Loop processing each band. */
    7895             :             /* ====================================================================
    7896             :              */
    7897             : 
    7898     4730010 :             for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
    7899             :             {
    7900     2823310 :                 double dfBandDensity = 0.0;
    7901     2823310 :                 double dfValueReal = 0.0;
    7902     2823310 :                 double dfValueImag = 0.0;
    7903     2823310 :                 double dfValueRealTmp = 0.0;
    7904     2823310 :                 double dfValueImagTmp = 0.0;
    7905             : 
    7906             :                 /* --------------------------------------------------------------------
    7907             :                  */
    7908             :                 /*      Collect the source value. */
    7909             :                 /* --------------------------------------------------------------------
    7910             :                  */
    7911             : 
    7912             :                 // Loop over source lines and pixels - 3 possible algorithms.
    7913             : 
    7914     2823310 :                 if (poWK->eResample == GRA_Average)
    7915             :                 {
    7916      300849 :                     double dfTotalWeight = 0.0;
    7917             : 
    7918             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7919             :                     // in gcore/overview.cpp.
    7920      631308 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7921             :                     {
    7922      330459 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7923      330459 :                         iSrcOffset = iSrcXMin +
    7924      330459 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7925      773407 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7926             :                              iSrcX++, iSrcOffset++)
    7927             :                         {
    7928      442948 :                             if (bWrapOverX)
    7929        1371 :                                 iSrcOffset =
    7930        1371 :                                     (iSrcX % nSrcXSize) +
    7931        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7932             : 
    7933      442952 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7934           4 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7935             :                                             iSrcOffset))
    7936             :                             {
    7937           1 :                                 continue;
    7938             :                             }
    7939             : 
    7940      442947 :                             if (GWKGetPixelValue(
    7941             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7942      885894 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7943      442947 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7944             :                             {
    7945      442947 :                                 const double dfWeight =
    7946      442947 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7947      442947 :                                 if (dfWeight > 0)
    7948             :                                 {
    7949             :                                     // Weighted incremental algorithm mean
    7950             :                                     // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7951      442947 :                                     dfTotalWeight += dfWeight;
    7952      442947 :                                     dfValueReal +=
    7953      442947 :                                         (dfWeight / dfTotalWeight) *
    7954      442947 :                                         (dfValueRealTmp - dfValueReal);
    7955      442947 :                                     if (bIsComplex)
    7956             :                                     {
    7957         252 :                                         dfValueImag +=
    7958         252 :                                             (dfWeight / dfTotalWeight) *
    7959         252 :                                             (dfValueImagTmp - dfValueImag);
    7960             :                                     }
    7961             :                                 }
    7962             :                             }
    7963             :                         }
    7964             :                     }
    7965             : 
    7966      300849 :                     if (dfTotalWeight > 0)
    7967             :                     {
    7968      300849 :                         if (poWK->bApplyVerticalShift)
    7969             :                         {
    7970           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7971           0 :                                 continue;
    7972             :                             // Subtract padfZ[] since the coordinate
    7973             :                             // transformation is from target to source
    7974           0 :                             dfValueReal =
    7975           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7976           0 :                                 padfZ[iDstX] *
    7977             :                                     dfMultFactorVerticalShiftPipeline;
    7978             :                         }
    7979             : 
    7980      300849 :                         dfBandDensity = 1;
    7981      300849 :                         bHasFoundDensity = true;
    7982             :                     }
    7983             :                 }  // GRA_Average.
    7984             : 
    7985     2522460 :                 else if (poWK->eResample == GRA_RMS)
    7986             :                 {
    7987      300416 :                     double dfTotalReal = 0.0;
    7988      300416 :                     double dfTotalImag = 0.0;
    7989      300416 :                     double dfTotalWeight = 0.0;
    7990             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7991             :                     // in gcore/overview.cpp.
    7992      630578 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7993             :                     {
    7994      330162 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7995      330162 :                         iSrcOffset = iSrcXMin +
    7996      330162 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7997      772930 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7998             :                              iSrcX++, iSrcOffset++)
    7999             :                         {
    8000      442768 :                             if (bWrapOverX)
    8001        1371 :                                 iSrcOffset =
    8002        1371 :                                     (iSrcX % nSrcXSize) +
    8003        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8004             : 
    8005      442768 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8006           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8007             :                                             iSrcOffset))
    8008             :                             {
    8009           0 :                                 continue;
    8010             :                             }
    8011             : 
    8012      442768 :                             if (GWKGetPixelValue(
    8013             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8014      885536 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8015      442768 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8016             :                             {
    8017      442768 :                                 const double dfWeight =
    8018      442768 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    8019      442768 :                                 dfTotalWeight += dfWeight;
    8020      442768 :                                 dfTotalReal +=
    8021      442768 :                                     dfValueRealTmp * dfValueRealTmp * dfWeight;
    8022      442768 :                                 if (bIsComplex)
    8023          48 :                                     dfTotalImag += dfValueImagTmp *
    8024          48 :                                                    dfValueImagTmp * dfWeight;
    8025             :                             }
    8026             :                         }
    8027             :                     }
    8028             : 
    8029      300416 :                     if (dfTotalWeight > 0)
    8030             :                     {
    8031      300416 :                         dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
    8032             : 
    8033      300416 :                         if (poWK->bApplyVerticalShift)
    8034             :                         {
    8035           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8036           0 :                                 continue;
    8037             :                             // Subtract padfZ[] since the coordinate
    8038             :                             // transformation is from target to source
    8039           0 :                             dfValueReal =
    8040           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8041           0 :                                 padfZ[iDstX] *
    8042             :                                     dfMultFactorVerticalShiftPipeline;
    8043             :                         }
    8044             : 
    8045      300416 :                         if (bIsComplex)
    8046          12 :                             dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
    8047             : 
    8048      300416 :                         dfBandDensity = 1;
    8049      300416 :                         bHasFoundDensity = true;
    8050             :                     }
    8051             :                 }  // GRA_RMS.
    8052             : 
    8053     2222040 :                 else if (poWK->eResample == GRA_Mode)
    8054             :                 {
    8055      496623 :                     float fMaxCount = 0.0f;
    8056      496623 :                     int nMode = -1;
    8057      496623 :                     bool bHasSourceValues = false;
    8058             : 
    8059      496623 :                     memset(pafCounts, 0, nBins * sizeof(float));
    8060             : 
    8061     1612560 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8062             :                     {
    8063     1115940 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    8064     1115940 :                         iSrcOffset = iSrcXMin +
    8065     1115940 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8066     4703370 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8067             :                              iSrcX++, iSrcOffset++)
    8068             :                         {
    8069     3587430 :                             if (bWrapOverX)
    8070        1371 :                                 iSrcOffset =
    8071        1371 :                                     (iSrcX % nSrcXSize) +
    8072        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8073             : 
    8074     3587430 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8075           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8076             :                                             iSrcOffset))
    8077           0 :                                 continue;
    8078             : 
    8079     3587430 :                             if (GWKGetPixelValue(
    8080             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8081     7174870 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8082     3587430 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8083             :                             {
    8084     3587430 :                                 bHasSourceValues = true;
    8085     3587430 :                                 const int nVal =
    8086     3587430 :                                     static_cast<int>(dfValueRealTmp);
    8087     3587430 :                                 const int iBin = nVal + nBinsOffset;
    8088     3587430 :                                 const double dfWeight =
    8089     3587430 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    8090             : 
    8091             :                                 // Sum the density.
    8092     3587430 :                                 pafCounts[iBin] += static_cast<float>(dfWeight);
    8093             :                                 // Is it the most common value so far?
    8094     3587430 :                                 bool bUpdateMode = pafCounts[iBin] > fMaxCount;
    8095     3587430 :                                 if (!bUpdateMode &&
    8096      750293 :                                     pafCounts[iBin] == fMaxCount)
    8097             :                                 {
    8098      217592 :                                     switch (eTieStrategy)
    8099             :                                     {
    8100      217584 :                                         case GWKTS_First:
    8101      217584 :                                             break;
    8102           4 :                                         case GWKTS_Min:
    8103           4 :                                             bUpdateMode = nVal < nMode;
    8104           4 :                                             break;
    8105           4 :                                         case GWKTS_Max:
    8106           4 :                                             bUpdateMode = nVal > nMode;
    8107           4 :                                             break;
    8108             :                                     }
    8109             :                                 }
    8110     3587430 :                                 if (bUpdateMode)
    8111             :                                 {
    8112     2837140 :                                     nMode = nVal;
    8113     2837140 :                                     fMaxCount = pafCounts[iBin];
    8114             :                                 }
    8115             :                             }
    8116             :                         }
    8117             :                     }
    8118             : 
    8119      496623 :                     if (bHasSourceValues)
    8120             :                     {
    8121      496623 :                         dfValueReal = nMode;
    8122      496623 :                         dfBandDensity = 1;
    8123      496623 :                         bHasFoundDensity = true;
    8124             :                     }
    8125             :                 }  // GRA_Mode.
    8126             : 
    8127     1725420 :                 else if (poWK->eResample == GRA_Max)
    8128             :                 {
    8129      335037 :                     bool bFoundValid = false;
    8130      335037 :                     double dfTotalReal = cpl::NumericLimits<double>::lowest();
    8131             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8132     1288010 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8133             :                     {
    8134      952975 :                         iSrcOffset = iSrcXMin +
    8135      952975 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8136     4376740 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8137             :                              iSrcX++, iSrcOffset++)
    8138             :                         {
    8139     3423770 :                             if (bWrapOverX)
    8140        1371 :                                 iSrcOffset =
    8141        1371 :                                     (iSrcX % nSrcXSize) +
    8142        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8143             : 
    8144     3426580 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8145        2809 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8146             :                                             iSrcOffset))
    8147             :                             {
    8148        2446 :                                 continue;
    8149             :                             }
    8150             : 
    8151             :                             // Returns pixel value if it is not no data.
    8152     3421320 :                             if (GWKGetPixelValue(
    8153             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8154     6842640 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8155     3421320 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8156             :                             {
    8157     3421320 :                                 bFoundValid = true;
    8158     3421320 :                                 if (dfTotalReal < dfValueRealTmp)
    8159             :                                 {
    8160      442234 :                                     dfTotalReal = dfValueRealTmp;
    8161             :                                 }
    8162             :                             }
    8163             :                         }
    8164             :                     }
    8165             : 
    8166      335037 :                     if (bFoundValid)
    8167             :                     {
    8168      335037 :                         dfValueReal = dfTotalReal;
    8169             : 
    8170      335037 :                         if (poWK->bApplyVerticalShift)
    8171             :                         {
    8172           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8173           0 :                                 continue;
    8174             :                             // Subtract padfZ[] since the coordinate
    8175             :                             // transformation is from target to source
    8176           0 :                             dfValueReal =
    8177           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8178           0 :                                 padfZ[iDstX] *
    8179             :                                     dfMultFactorVerticalShiftPipeline;
    8180             :                         }
    8181             : 
    8182      335037 :                         dfBandDensity = 1;
    8183      335037 :                         bHasFoundDensity = true;
    8184             :                     }
    8185             :                 }
    8186             : 
    8187     1390380 :                 else if (poWK->eResample == GRA_Min)
    8188             :                 {
    8189      335012 :                     bool bFoundValid = false;
    8190      335012 :                     double dfTotalReal = cpl::NumericLimits<double>::max();
    8191             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8192     1287720 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8193             :                     {
    8194      952710 :                         iSrcOffset = iSrcXMin +
    8195      952710 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8196     4373670 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8197             :                              iSrcX++, iSrcOffset++)
    8198             :                         {
    8199     3420960 :                             if (bWrapOverX)
    8200        1371 :                                 iSrcOffset =
    8201        1371 :                                     (iSrcX % nSrcXSize) +
    8202        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8203             : 
    8204     3420960 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8205           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8206             :                                             iSrcOffset))
    8207             :                             {
    8208           0 :                                 continue;
    8209             :                             }
    8210             : 
    8211             :                             // Returns pixel value if it is not no data.
    8212     3420960 :                             if (GWKGetPixelValue(
    8213             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8214     6841920 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8215     3420960 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8216             :                             {
    8217     3420960 :                                 bFoundValid = true;
    8218     3420960 :                                 if (dfTotalReal > dfValueRealTmp)
    8219             :                                 {
    8220      442628 :                                     dfTotalReal = dfValueRealTmp;
    8221             :                                 }
    8222             :                             }
    8223             :                         }
    8224             :                     }
    8225             : 
    8226      335012 :                     if (bFoundValid)
    8227             :                     {
    8228      335012 :                         dfValueReal = dfTotalReal;
    8229             : 
    8230      335012 :                         if (poWK->bApplyVerticalShift)
    8231             :                         {
    8232           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8233           0 :                                 continue;
    8234             :                             // Subtract padfZ[] since the coordinate
    8235             :                             // transformation is from target to source
    8236           0 :                             dfValueReal =
    8237           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8238           0 :                                 padfZ[iDstX] *
    8239             :                                     dfMultFactorVerticalShiftPipeline;
    8240             :                         }
    8241             : 
    8242      335012 :                         dfBandDensity = 1;
    8243      335012 :                         bHasFoundDensity = true;
    8244             :                     }
    8245             :                 }  // GRA_Min.
    8246             : 
    8247             :                 else
    8248             :                 // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
    8249             :                 {
    8250     1055370 :                     CPLAssert(quant > 0.0f);
    8251             : 
    8252     1055370 :                     bool bFoundValid = false;
    8253     1055370 :                     std::vector<double> dfRealValuesTmp;
    8254             : 
    8255             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8256     4014130 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8257             :                     {
    8258     2958760 :                         iSrcOffset = iSrcXMin +
    8259     2958760 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8260    13421300 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8261             :                              iSrcX++, iSrcOffset++)
    8262             :                         {
    8263    10462500 :                             if (bWrapOverX)
    8264        4113 :                                 iSrcOffset =
    8265        4113 :                                     (iSrcX % nSrcXSize) +
    8266        4113 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8267             : 
    8268    10659100 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8269      196608 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8270             :                                             iSrcOffset))
    8271             :                             {
    8272      195449 :                                 continue;
    8273             :                             }
    8274             : 
    8275             :                             // Returns pixel value if it is not no data.
    8276    10267100 :                             if (GWKGetPixelValue(
    8277             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8278    20534100 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8279    10267100 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8280             :                             {
    8281    10267100 :                                 bFoundValid = true;
    8282    10267100 :                                 dfRealValuesTmp.push_back(dfValueRealTmp);
    8283             :                             }
    8284             :                         }
    8285             :                     }
    8286             : 
    8287     1055370 :                     if (bFoundValid)
    8288             :                     {
    8289     1006150 :                         std::sort(dfRealValuesTmp.begin(),
    8290             :                                   dfRealValuesTmp.end());
    8291             :                         int quantIdx = static_cast<int>(
    8292     1006150 :                             std::ceil(quant * dfRealValuesTmp.size() - 1));
    8293     1006150 :                         dfValueReal = dfRealValuesTmp[quantIdx];
    8294             : 
    8295     1006150 :                         if (poWK->bApplyVerticalShift)
    8296             :                         {
    8297           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8298           0 :                                 continue;
    8299             :                             // Subtract padfZ[] since the coordinate
    8300             :                             // transformation is from target to source
    8301           0 :                             dfValueReal =
    8302           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8303           0 :                                 padfZ[iDstX] *
    8304             :                                     dfMultFactorVerticalShiftPipeline;
    8305             :                         }
    8306             : 
    8307     1006150 :                         dfBandDensity = 1;
    8308     1006150 :                         bHasFoundDensity = true;
    8309     1006150 :                         dfRealValuesTmp.clear();
    8310             :                     }
    8311             :                 }  // Quantile.
    8312             : 
    8313             :                 /* --------------------------------------------------------------------
    8314             :                  */
    8315             :                 /*      We have a computed value from the source.  Now apply it
    8316             :                  * to      */
    8317             :                 /*      the destination pixel. */
    8318             :                 /* --------------------------------------------------------------------
    8319             :                  */
    8320     2823310 :                 if (bHasFoundDensity)
    8321             :                 {
    8322             :                     // TODO: Should we compute dfBandDensity in fct of
    8323             :                     // nCount/nCount2, or use as a threshold to set the dest
    8324             :                     // value?
    8325             :                     // dfBandDensity = (float) nCount / nCount2;
    8326             :                     // if( (float) nCount / nCount2 > 0.1 )
    8327             :                     // or fix gdalwarp crop_to_cutline to crop partially
    8328             :                     // overlapping pixels.
    8329     2774080 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    8330             :                                      dfValueReal, dfValueImag,
    8331             :                                      bAvoidNoDataSingleBand);
    8332             :                 }
    8333             :             }
    8334             : 
    8335     1906710 :             if (!bHasFoundDensity)
    8336      507971 :                 continue;
    8337             : 
    8338     1398740 :             if (!bAvoidNoDataSingleBand)
    8339             :             {
    8340           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    8341             :             }
    8342             : 
    8343             :             /* --------------------------------------------------------------------
    8344             :              */
    8345             :             /*      Update destination density/validity masks. */
    8346             :             /* --------------------------------------------------------------------
    8347             :              */
    8348     1398740 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    8349             : 
    8350     1398740 :             if (poWK->panDstValid != nullptr)
    8351             :             {
    8352        1184 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    8353             :             }
    8354             :         } /* Next iDstX */
    8355             : 
    8356             :         /* --------------------------------------------------------------------
    8357             :          */
    8358             :         /*      Report progress to the user, and optionally cancel out. */
    8359             :         /* --------------------------------------------------------------------
    8360             :          */
    8361        8078 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    8362           0 :             break;
    8363             :     }
    8364             : 
    8365             :     /* -------------------------------------------------------------------- */
    8366             :     /*      Cleanup and return.                                             */
    8367             :     /* -------------------------------------------------------------------- */
    8368         136 :     CPLFree(padfX);
    8369         136 :     CPLFree(padfY);
    8370         136 :     CPLFree(padfZ);
    8371         136 :     CPLFree(padfX2);
    8372         136 :     CPLFree(padfY2);
    8373         136 :     CPLFree(padfZ2);
    8374         136 :     CPLFree(pabSuccess);
    8375         136 :     CPLFree(pabSuccess2);
    8376         136 :     VSIFree(pafCounts);
    8377             : }
    8378             : 
    8379             : /************************************************************************/
    8380             : /*                           getOrientation()                           */
    8381             : /************************************************************************/
    8382             : 
    8383             : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
    8384             : // -1 if it is counter-clockwise oriented,
    8385             : // or 0 if it is colinear.
    8386     2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
    8387             : {
    8388     2355910 :     const double p1x = p1.first;
    8389     2355910 :     const double p1y = p1.second;
    8390     2355910 :     const double p2x = p2.first;
    8391     2355910 :     const double p2y = p2.second;
    8392     2355910 :     const double p3x = p3.first;
    8393     2355910 :     const double p3y = p3.second;
    8394     2355910 :     const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
    8395     2355910 :     if (std::abs(val) < 1e-20)
    8396        2690 :         return 0;
    8397     2353220 :     else if (val > 0)
    8398           0 :         return 1;
    8399             :     else
    8400     2353220 :         return -1;
    8401             : }
    8402             : 
    8403             : /************************************************************************/
    8404             : /*                              isConvex()                              */
    8405             : /************************************************************************/
    8406             : 
    8407             : // poly must be closed
    8408      785302 : static bool isConvex(const XYPoly &poly)
    8409             : {
    8410      785302 :     const size_t n = poly.size();
    8411      785302 :     size_t i = 0;
    8412      785302 :     int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8413      785302 :     ++i;
    8414     2355910 :     for (; i < n - 2; ++i)
    8415             :     {
    8416             :         const int orientation =
    8417     1570600 :             getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8418     1570600 :         if (orientation != 0)
    8419             :         {
    8420     1567910 :             if (last_orientation == 0)
    8421           0 :                 last_orientation = orientation;
    8422     1567910 :             else if (orientation != last_orientation)
    8423           0 :                 return false;
    8424             :         }
    8425             :     }
    8426      785302 :     return true;
    8427             : }
    8428             : 
    8429             : /************************************************************************/
    8430             : /*                     pointIntersectsConvexPoly()                      */
    8431             : /************************************************************************/
    8432             : 
    8433             : // Returns whether xy intersects poly, that must be closed and convex.
    8434     6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
    8435             : {
    8436     6049100 :     const size_t n = poly.size();
    8437     6049100 :     double dx1 = xy.first - poly[0].first;
    8438     6049100 :     double dy1 = xy.second - poly[0].second;
    8439     6049100 :     double dx2 = poly[1].first - poly[0].first;
    8440     6049100 :     double dy2 = poly[1].second - poly[0].second;
    8441     6049100 :     double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
    8442             : 
    8443             :     // Check if the point remains on the same side (left/right) of all edges
    8444    14556400 :     for (size_t i = 2; i < n; i++)
    8445             :     {
    8446    12793100 :         dx1 = xy.first - poly[i - 1].first;
    8447    12793100 :         dy1 = xy.second - poly[i - 1].second;
    8448             : 
    8449    12793100 :         dx2 = poly[i].first - poly[i - 1].first;
    8450    12793100 :         dy2 = poly[i].second - poly[i - 1].second;
    8451             : 
    8452    12793100 :         double crossProduct = dx1 * dy2 - dx2 * dy1;
    8453    12793100 :         if (std::abs(prevCrossProduct) < 1e-20)
    8454      725558 :             prevCrossProduct = crossProduct;
    8455    12067500 :         else if (prevCrossProduct * crossProduct < 0)
    8456     4285760 :             return false;
    8457             :     }
    8458             : 
    8459     1763340 :     return true;
    8460             : }
    8461             : 
    8462             : /************************************************************************/
    8463             : /*                          getIntersection()                           */
    8464             : /************************************************************************/
    8465             : 
    8466             : /* Returns intersection of [p1,p2] with [p3,p4], if
    8467             :  * it is a single point, and the 2 segments are not colinear.
    8468             :  */
    8469    11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
    8470             :                             const XYPair &p3, const XYPair &p4, XYPair &xy)
    8471             : {
    8472    11811000 :     const double x1 = p1.first;
    8473    11811000 :     const double y1 = p1.second;
    8474    11811000 :     const double x2 = p2.first;
    8475    11811000 :     const double y2 = p2.second;
    8476    11811000 :     const double x3 = p3.first;
    8477    11811000 :     const double y3 = p3.second;
    8478    11811000 :     const double x4 = p4.first;
    8479    11811000 :     const double y4 = p4.second;
    8480    11811000 :     const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
    8481    11811000 :     const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
    8482    11811000 :     if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
    8483     9260780 :         return false;
    8484             : 
    8485     2550260 :     const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
    8486     2550260 :     if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
    8487      973924 :         return false;
    8488             : 
    8489     1576340 :     const double t = t_num / denom;
    8490     1576340 :     xy.first = x1 + t * (x2 - x1);
    8491     1576340 :     xy.second = y1 + t * (y2 - y1);
    8492     1576340 :     return true;
    8493             : }
    8494             : 
    8495             : /************************************************************************/
    8496             : /*                     getConvexPolyIntersection()                      */
    8497             : /************************************************************************/
    8498             : 
    8499             : // poly1 and poly2 must be closed and convex.
    8500             : // The returned intersection will not necessary be closed.
    8501      785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
    8502             :                                       XYPoly &intersection)
    8503             : {
    8504      785302 :     intersection.clear();
    8505             : 
    8506             :     // Add all points of poly1 inside poly2
    8507     3926510 :     for (size_t i = 0; i < poly1.size() - 1; ++i)
    8508             :     {
    8509     3141210 :         if (pointIntersectsConvexPoly(poly1[i], poly2))
    8510     1187430 :             intersection.push_back(poly1[i]);
    8511             :     }
    8512      785302 :     if (intersection.size() == poly1.size() - 1)
    8513             :     {
    8514             :         // poly1 is inside poly2
    8515      119100 :         return;
    8516             :     }
    8517             : 
    8518             :     // Add all points of poly2 inside poly1
    8519     3634860 :     for (size_t i = 0; i < poly2.size() - 1; ++i)
    8520             :     {
    8521     2907890 :         if (pointIntersectsConvexPoly(poly2[i], poly1))
    8522      575904 :             intersection.push_back(poly2[i]);
    8523             :     }
    8524             : 
    8525             :     // Compute the intersection of all edges of both polygons
    8526      726972 :     XYPair xy;
    8527     3634860 :     for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
    8528             :     {
    8529    14539400 :         for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
    8530             :         {
    8531    11631600 :             if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
    8532    11631600 :                                 poly2[i2 + 1], xy))
    8533             :             {
    8534     1576230 :                 intersection.push_back(xy);
    8535             :             }
    8536             :         }
    8537             :     }
    8538             : 
    8539      726972 :     if (intersection.empty())
    8540       60770 :         return;
    8541             : 
    8542             :     // Find lowest-left point in intersection set
    8543      666202 :     double lowest_x = cpl::NumericLimits<double>::max();
    8544      666202 :     double lowest_y = cpl::NumericLimits<double>::max();
    8545     3772450 :     for (const auto &pair : intersection)
    8546             :     {
    8547     3106240 :         const double x = pair.first;
    8548     3106240 :         const double y = pair.second;
    8549     3106240 :         if (y < lowest_y || (y == lowest_y && x < lowest_x))
    8550             :         {
    8551     1096040 :             lowest_x = x;
    8552     1096040 :             lowest_y = y;
    8553             :         }
    8554             :     }
    8555             : 
    8556     5737980 :     const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
    8557             :     {
    8558     5737980 :         const double p1x_diff = p1.first - lowest_x;
    8559     5737980 :         const double p1y_diff = p1.second - lowest_y;
    8560     5737980 :         const double p2x_diff = p2.first - lowest_x;
    8561     5737980 :         const double p2y_diff = p2.second - lowest_y;
    8562     5737980 :         if (p2y_diff == 0.0 && p1y_diff == 0.0)
    8563             :         {
    8564     2655420 :             if (p1x_diff >= 0)
    8565             :             {
    8566     2655420 :                 if (p2x_diff >= 0)
    8567     2655420 :                     return p1.first < p2.first;
    8568           0 :                 return true;
    8569             :             }
    8570             :             else
    8571             :             {
    8572           0 :                 if (p2x_diff >= 0)
    8573           0 :                     return false;
    8574           0 :                 return p1.first < p2.first;
    8575             :             }
    8576             :         }
    8577             : 
    8578     3082560 :         if (p2x_diff == 0.0 && p1x_diff == 0.0)
    8579     1046960 :             return p1.second < p2.second;
    8580             : 
    8581             :         double tan_p1;
    8582     2035600 :         if (p1x_diff == 0.0)
    8583      464622 :             tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    8584             :         else
    8585     1570980 :             tan_p1 = p1y_diff / p1x_diff;
    8586             : 
    8587             :         double tan_p2;
    8588     2035600 :         if (p2x_diff == 0.0)
    8589      839515 :             tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    8590             :         else
    8591     1196080 :             tan_p2 = p2y_diff / p2x_diff;
    8592             : 
    8593     2035600 :         if (tan_p1 >= 0)
    8594             :         {
    8595     1904790 :             if (tan_p2 >= 0)
    8596     1881590 :                 return tan_p1 < tan_p2;
    8597             :             else
    8598       23199 :                 return true;
    8599             :         }
    8600             :         else
    8601             :         {
    8602      130806 :             if (tan_p2 >= 0)
    8603      103900 :                 return false;
    8604             :             else
    8605       26906 :                 return tan_p1 < tan_p2;
    8606             :         }
    8607      666202 :     };
    8608             : 
    8609             :     // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
    8610             :     // hull
    8611      666202 :     std::sort(intersection.begin(), intersection.end(), sortFunc);
    8612             : 
    8613             :     // Remove duplicated points
    8614      666202 :     size_t j = 1;
    8615     3106240 :     for (size_t i = 1; i < intersection.size(); ++i)
    8616             :     {
    8617     2440040 :         if (intersection[i] != intersection[i - 1])
    8618             :         {
    8619     1452560 :             if (j < i)
    8620      545275 :                 intersection[j] = intersection[i];
    8621     1452560 :             ++j;
    8622             :         }
    8623             :     }
    8624      666202 :     intersection.resize(j);
    8625             : }
    8626             : 
    8627             : /************************************************************************/
    8628             : /*                          GWKSumPreserving()                          */
    8629             : /************************************************************************/
    8630             : 
    8631             : static void GWKSumPreservingThread(void *pData);
    8632             : 
    8633          19 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
    8634             : {
    8635          19 :     return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
    8636             : }
    8637             : 
    8638          19 : static void GWKSumPreservingThread(void *pData)
    8639             : {
    8640          19 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    8641          19 :     GDALWarpKernel *poWK = psJob->poWK;
    8642          19 :     const int iYMin = psJob->iYMin;
    8643          19 :     const int iYMax = psJob->iYMax;
    8644             :     const bool bIsAffineNoRotation =
    8645          19 :         GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
    8646          28 :                                         poWK->pTransformerArg) &&
    8647             :         // for debug/testing purposes
    8648           9 :         CPLTestBool(
    8649          19 :             CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
    8650             :     const bool bAvoidNoDataSingleBand =
    8651          21 :         poWK->nBands == 1 ||
    8652           2 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    8653          19 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    8654             : 
    8655          19 :     const int nDstXSize = poWK->nDstXSize;
    8656          19 :     const int nSrcXSize = poWK->nSrcXSize;
    8657          19 :     const int nSrcYSize = poWK->nSrcYSize;
    8658             : 
    8659          38 :     std::vector<double> adfX0(nSrcXSize + 1);
    8660          38 :     std::vector<double> adfY0(nSrcXSize + 1);
    8661          38 :     std::vector<double> adfZ0(nSrcXSize + 1);
    8662          38 :     std::vector<double> adfX1(nSrcXSize + 1);
    8663          38 :     std::vector<double> adfY1(nSrcXSize + 1);
    8664          38 :     std::vector<double> adfZ1(nSrcXSize + 1);
    8665          38 :     std::vector<int> abSuccess0(nSrcXSize + 1);
    8666          38 :     std::vector<int> abSuccess1(nSrcXSize + 1);
    8667             : 
    8668             :     CPLRectObj sGlobalBounds;
    8669          19 :     sGlobalBounds.minx = -2 * poWK->dfXScale;
    8670          19 :     sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
    8671          19 :     sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
    8672          19 :     sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
    8673          19 :     CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
    8674             : 
    8675             :     struct SourcePixel
    8676             :     {
    8677             :         int iSrcX;
    8678             :         int iSrcY;
    8679             : 
    8680             :         // Coordinates of source pixel in target pixel coordinates
    8681             :         double dfDstX0;
    8682             :         double dfDstY0;
    8683             :         double dfDstX1;
    8684             :         double dfDstY1;
    8685             :         double dfDstX2;
    8686             :         double dfDstY2;
    8687             :         double dfDstX3;
    8688             :         double dfDstY3;
    8689             : 
    8690             :         // Source pixel total area (might be larger than the one described
    8691             :         // by above coordinates, if the pixel was crossing the antimeridian
    8692             :         // and split)
    8693             :         double dfArea;
    8694             :     };
    8695             : 
    8696          38 :     std::vector<SourcePixel> sourcePixels;
    8697             : 
    8698          38 :     XYPoly discontinuityLeft(5);
    8699          38 :     XYPoly discontinuityRight(5);
    8700             : 
    8701             :     /* ==================================================================== */
    8702             :     /*      First pass: transform the 4 corners of each potential           */
    8703             :     /*      contributing source pixel to target pixel coordinates.          */
    8704             :     /* ==================================================================== */
    8705             : 
    8706             :     // Special case for top line
    8707             :     {
    8708          19 :         int iY = 0;
    8709        3364 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8710             :         {
    8711        3345 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8712        3345 :             adfY1[iX] = iY + poWK->nSrcYOff;
    8713        3345 :             adfZ1[iX] = 0;
    8714             :         }
    8715             : 
    8716          19 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8717             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8718             :                              abSuccess1.data());
    8719             : 
    8720        3364 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8721             :         {
    8722        3345 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8723           0 :                 abSuccess1[iX] = FALSE;
    8724             :             else
    8725             :             {
    8726        3345 :                 adfX1[iX] -= poWK->nDstXOff;
    8727        3345 :                 adfY1[iX] -= poWK->nDstYOff;
    8728             :             }
    8729             :         }
    8730             :     }
    8731             : 
    8732        2032 :     const auto getInsideXSign = [poWK, nDstXSize](double dfX)
    8733             :     {
    8734        2032 :         return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
    8735         872 :                        dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
    8736        2032 :                    ? 1
    8737        1160 :                    : -1;
    8738          19 :     };
    8739             : 
    8740             :     const auto FindDiscontinuity =
    8741          80 :         [poWK, psJob, getInsideXSign](
    8742             :             double dfXLeft, double dfXRight, double dfY,
    8743             :             int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
    8744         800 :             double &dfXMidReprojectedRight, double &dfYMidReprojected)
    8745             :     {
    8746         880 :         for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
    8747             :         {
    8748         800 :             double dfXMid = (dfXLeft + dfXRight) / 2;
    8749         800 :             double dfXMidReprojected = dfXMid;
    8750         800 :             dfYMidReprojected = dfY;
    8751         800 :             double dfZ = 0;
    8752         800 :             int nSuccess = 0;
    8753         800 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
    8754             :                                  &dfXMidReprojected, &dfYMidReprojected, &dfZ,
    8755             :                                  &nSuccess);
    8756         800 :             if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
    8757             :             {
    8758         456 :                 dfXRight = dfXMid;
    8759         456 :                 dfXMidReprojectedRight = dfXMidReprojected;
    8760             :             }
    8761             :             else
    8762             :             {
    8763         344 :                 dfXLeft = dfXMid;
    8764         344 :                 dfXMidReprojectedLeft = dfXMidReprojected;
    8765             :             }
    8766             :         }
    8767          80 :     };
    8768             : 
    8769        2685 :     for (int iY = 0; iY < nSrcYSize; ++iY)
    8770             :     {
    8771        2666 :         std::swap(adfX0, adfX1);
    8772        2666 :         std::swap(adfY0, adfY1);
    8773        2666 :         std::swap(adfZ0, adfZ1);
    8774        2666 :         std::swap(abSuccess0, abSuccess1);
    8775             : 
    8776     4836120 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8777             :         {
    8778     4833460 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8779     4833460 :             adfY1[iX] = iY + 1 + poWK->nSrcYOff;
    8780     4833460 :             adfZ1[iX] = 0;
    8781             :         }
    8782             : 
    8783        2666 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8784             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8785             :                              abSuccess1.data());
    8786             : 
    8787     4836120 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8788             :         {
    8789     4833460 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8790           0 :                 abSuccess1[iX] = FALSE;
    8791             :             else
    8792             :             {
    8793     4833460 :                 adfX1[iX] -= poWK->nDstXOff;
    8794     4833460 :                 adfY1[iX] -= poWK->nDstYOff;
    8795             :             }
    8796             :         }
    8797             : 
    8798     4833460 :         for (int iX = 0; iX < nSrcXSize; ++iX)
    8799             :         {
    8800     9661580 :             if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
    8801     4830790 :                 abSuccess1[iX + 1])
    8802             :             {
    8803             :                 /* --------------------------------------------------------------------
    8804             :                  */
    8805             :                 /*      Do not try to apply transparent source pixels to the
    8806             :                  * destination.*/
    8807             :                 /* --------------------------------------------------------------------
    8808             :                  */
    8809     4830790 :                 const auto iSrcOffset =
    8810     4830790 :                     iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
    8811     9560570 :                 if (poWK->panUnifiedSrcValid != nullptr &&
    8812     4729780 :                     !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    8813             :                 {
    8814     4738340 :                     continue;
    8815             :                 }
    8816             : 
    8817      103415 :                 if (poWK->pafUnifiedSrcDensity != nullptr)
    8818             :                 {
    8819           0 :                     if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
    8820             :                         SRC_DENSITY_THRESHOLD_FLOAT)
    8821           0 :                         continue;
    8822             :                 }
    8823             : 
    8824             :                 SourcePixel sp;
    8825      103415 :                 sp.dfArea = 0;
    8826      103415 :                 sp.dfDstX0 = adfX0[iX];
    8827      103415 :                 sp.dfDstY0 = adfY0[iX];
    8828      103415 :                 sp.dfDstX1 = adfX0[iX + 1];
    8829      103415 :                 sp.dfDstY1 = adfY0[iX + 1];
    8830      103415 :                 sp.dfDstX2 = adfX1[iX + 1];
    8831      103415 :                 sp.dfDstY2 = adfY1[iX + 1];
    8832      103415 :                 sp.dfDstX3 = adfX1[iX];
    8833      103415 :                 sp.dfDstY3 = adfY1[iX];
    8834             : 
    8835             :                 // Detect pixel that likely cross the anti-meridian and
    8836             :                 // introduce a discontinuity when reprojected.
    8837             : 
    8838      103415 :                 if (std::fabs(adfX0[iX] - adfX0[iX + 1]) > 2 * poWK->dfXScale &&
    8839          80 :                     std::fabs(adfX1[iX] - adfX1[iX + 1]) > 2 * poWK->dfXScale &&
    8840          40 :                     getInsideXSign(adfX0[iX]) !=
    8841          80 :                         getInsideXSign(adfX0[iX + 1]) &&
    8842          80 :                     getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
    8843          40 :                     getInsideXSign(adfX0[iX + 1]) ==
    8844      103495 :                         getInsideXSign(adfX1[iX + 1]) &&
    8845          40 :                     (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
    8846             :                         0)
    8847             :                 {
    8848             : #ifdef DEBUG_VERBOSE
    8849             :                     CPLDebug(
    8850             :                         "WARP",
    8851             :                         "Discontinuity for iSrcX=%d, iSrcY=%d, dest corners:"
    8852             :                         "X0[iX]=%f X0[iX+1]=%f X1[iX]=%f X1[iX+1]=%f,"
    8853             :                         "Y0[iX]=%f Y0[iX+1]=%f Y1[iX]=%f Y1[iX+1]=%f",
    8854             :                         iX + poWK->nSrcXOff, iY + poWK->nSrcYOff, adfX0[iX],
    8855             :                         adfX0[iX + 1], adfX1[iX], adfX1[iX + 1], adfY0[iX],
    8856             :                         adfY0[iX + 1], adfY1[iX], adfY1[iX + 1]);
    8857             : #endif
    8858          40 :                     double dfXMidReprojectedLeftTop = 0;
    8859          40 :                     double dfXMidReprojectedRightTop = 0;
    8860          40 :                     double dfYMidReprojectedTop = 0;
    8861          40 :                     FindDiscontinuity(
    8862          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8863          80 :                         iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
    8864             :                         dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
    8865             :                         dfYMidReprojectedTop);
    8866          40 :                     double dfXMidReprojectedLeftBottom = 0;
    8867          40 :                     double dfXMidReprojectedRightBottom = 0;
    8868          40 :                     double dfYMidReprojectedBottom = 0;
    8869          40 :                     FindDiscontinuity(
    8870          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8871          80 :                         iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
    8872             :                         dfXMidReprojectedLeftBottom,
    8873             :                         dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
    8874             : 
    8875          40 :                     discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
    8876          40 :                     discontinuityLeft[1] =
    8877          80 :                         XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
    8878          40 :                     discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
    8879          40 :                                                   dfYMidReprojectedBottom);
    8880          40 :                     discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
    8881          40 :                     discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
    8882             : 
    8883          40 :                     discontinuityRight[0] =
    8884          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8885          40 :                     discontinuityRight[1] =
    8886          80 :                         XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
    8887          40 :                     discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
    8888          40 :                                                    dfYMidReprojectedBottom);
    8889          40 :                     discontinuityRight[3] =
    8890          80 :                         XYPair(adfX1[iX + 1], adfY1[iX + 1]);
    8891          40 :                     discontinuityRight[4] =
    8892          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8893             : 
    8894          40 :                     sp.dfArea = getArea(discontinuityLeft) +
    8895          40 :                                 getArea(discontinuityRight);
    8896          40 :                     if (getInsideXSign(adfX0[iX]) >= 1)
    8897             :                     {
    8898          20 :                         sp.dfDstX1 = dfXMidReprojectedLeftTop;
    8899          20 :                         sp.dfDstY1 = dfYMidReprojectedTop;
    8900          20 :                         sp.dfDstX2 = dfXMidReprojectedLeftBottom;
    8901          20 :                         sp.dfDstY2 = dfYMidReprojectedBottom;
    8902             :                     }
    8903             :                     else
    8904             :                     {
    8905          20 :                         sp.dfDstX0 = dfXMidReprojectedRightTop;
    8906          20 :                         sp.dfDstY0 = dfYMidReprojectedTop;
    8907          20 :                         sp.dfDstX3 = dfXMidReprojectedRightBottom;
    8908          20 :                         sp.dfDstY3 = dfYMidReprojectedBottom;
    8909             :                     }
    8910             :                 }
    8911             : 
    8912             :                 // Bounding box of source pixel (expressed in target pixel
    8913             :                 // coordinates)
    8914             :                 CPLRectObj sRect;
    8915      103415 :                 sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
    8916      103415 :                                       std::min(sp.dfDstX2, sp.dfDstX3));
    8917      103415 :                 sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
    8918      103415 :                                       std::min(sp.dfDstY2, sp.dfDstY3));
    8919      103415 :                 sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
    8920      103415 :                                       std::max(sp.dfDstX2, sp.dfDstX3));
    8921      103415 :                 sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
    8922      103415 :                                       std::max(sp.dfDstY2, sp.dfDstY3));
    8923      103415 :                 if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
    8924      101355 :                       sRect.miny < iYMax && sRect.maxy > iYMin))
    8925             :                 {
    8926       10852 :                     continue;
    8927             :                 }
    8928             : 
    8929       92563 :                 sp.iSrcX = iX;
    8930       92563 :                 sp.iSrcY = iY;
    8931             : 
    8932       92563 :                 if (!bIsAffineNoRotation)
    8933             :                 {
    8934             :                     // Check polygon validity (no self-crossing)
    8935       89745 :                     XYPair xy;
    8936       89745 :                     if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
    8937       89745 :                                         XYPair(sp.dfDstX1, sp.dfDstY1),
    8938       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8939      269235 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
    8940       89745 :                         getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
    8941       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8942       89745 :                                         XYPair(sp.dfDstX0, sp.dfDstY0),
    8943      179490 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy))
    8944             :                     {
    8945         113 :                         continue;
    8946             :                     }
    8947             :                 }
    8948             : 
    8949       92450 :                 CPLQuadTreeInsertWithBounds(
    8950             :                     hQuadTree,
    8951             :                     reinterpret_cast<void *>(
    8952       92450 :                         static_cast<uintptr_t>(sourcePixels.size())),
    8953             :                     &sRect);
    8954             : 
    8955       92450 :                 sourcePixels.push_back(sp);
    8956             :             }
    8957             :         }
    8958             :     }
    8959             : 
    8960          38 :     std::vector<double> adfRealValue(poWK->nBands);
    8961          38 :     std::vector<double> adfImagValue(poWK->nBands);
    8962          38 :     std::vector<double> adfBandDensity(poWK->nBands);
    8963          38 :     std::vector<double> adfWeight(poWK->nBands);
    8964             : 
    8965             : #ifdef CHECK_SUM_WITH_GEOS
    8966             :     auto hGEOSContext = OGRGeometry::createGEOSContext();
    8967             :     auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8968             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
    8969             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
    8970             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
    8971             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
    8972             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
    8973             :     auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
    8974             :     auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
    8975             : 
    8976             :     auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8977             :     auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
    8978             :     auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
    8979             : #endif
    8980             : 
    8981             :     const XYPoly xy1{
    8982          38 :         {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
    8983          38 :     XYPoly xy2(5);
    8984          38 :     XYPoly xy2_triangle(4);
    8985          38 :     XYPoly intersection;
    8986             : 
    8987             :     /* ==================================================================== */
    8988             :     /*      Loop over output lines.                                         */
    8989             :     /* ==================================================================== */
    8990        1951 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    8991             :     {
    8992             :         CPLRectObj sRect;
    8993        1932 :         sRect.miny = iDstY;
    8994        1932 :         sRect.maxy = iDstY + 1;
    8995             : 
    8996             :         /* ====================================================================
    8997             :          */
    8998             :         /*      Loop over pixels in output scanline. */
    8999             :         /* ====================================================================
    9000             :          */
    9001     1403940 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    9002             :         {
    9003     1402010 :             sRect.minx = iDstX;
    9004     1402010 :             sRect.maxx = iDstX + 1;
    9005     1402010 :             int nSourcePixels = 0;
    9006             :             void **pahSourcePixel =
    9007     1402010 :                 CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
    9008     1402010 :             if (nSourcePixels == 0)
    9009             :             {
    9010     1183090 :                 CPLFree(pahSourcePixel);
    9011     1183100 :                 continue;
    9012             :             }
    9013             : 
    9014      218919 :             std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
    9015      218919 :             std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
    9016      218919 :             std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
    9017      218919 :             std::fill(adfWeight.begin(), adfWeight.end(), 0);
    9018      218919 :             double dfDensity = 0;
    9019             :             // Just above zero to please Coveriy Scan
    9020      218919 :             double dfTotalWeight = std::numeric_limits<double>::min();
    9021             : 
    9022             :             /* ====================================================================
    9023             :              */
    9024             :             /*          Iterate over each contributing source pixel to add its
    9025             :              */
    9026             :             /*          value weighed by the ratio of the area of its
    9027             :              * intersection  */
    9028             :             /*          with the target pixel divided by the area of the source
    9029             :              */
    9030             :             /*          pixel. */
    9031             :             /* ====================================================================
    9032             :              */
    9033     1020550 :             for (int i = 0; i < nSourcePixels; ++i)
    9034             :             {
    9035      801628 :                 const int iSourcePixel = static_cast<int>(
    9036      801628 :                     reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
    9037      801628 :                 auto &sp = sourcePixels[iSourcePixel];
    9038             : 
    9039      801628 :                 double dfWeight = 0.0;
    9040      801628 :                 if (bIsAffineNoRotation)
    9041             :                 {
    9042             :                     // Optimization since the source pixel is a rectangle in
    9043             :                     // target pixel coordinates
    9044       16326 :                     double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
    9045       16326 :                     double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
    9046       16326 :                     double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
    9047       16326 :                     double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
    9048       16326 :                     double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
    9049       16326 :                     double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
    9050       16326 :                     double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
    9051       16326 :                     double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
    9052       16326 :                     dfWeight =
    9053       16326 :                         ((dfIntersMaxX - dfIntersMinX) *
    9054       16326 :                          (dfIntersMaxY - dfIntersMinY)) /
    9055       16326 :                         ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
    9056             :                 }
    9057             :                 else
    9058             :                 {
    9059             :                     // Compute the polygon of the source pixel in target pixel
    9060             :                     // coordinates, and shifted to the target pixel (unit square
    9061             :                     // coordinates)
    9062             : 
    9063      785302 :                     xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    9064      785302 :                     xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
    9065      785302 :                     xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
    9066      785302 :                     xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
    9067      785302 :                     xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    9068             : 
    9069      785302 :                     if (isConvex(xy2))
    9070             :                     {
    9071      785302 :                         getConvexPolyIntersection(xy1, xy2, intersection);
    9072      785302 :                         if (intersection.size() >= 3)
    9073             :                         {
    9074      468849 :                             dfWeight = getArea(intersection);
    9075             :                         }
    9076             :                     }
    9077             :                     else
    9078             :                     {
    9079             :                         // Split xy2 into 2 triangles.
    9080           0 :                         xy2_triangle[0] = xy2[0];
    9081           0 :                         xy2_triangle[1] = xy2[1];
    9082           0 :                         xy2_triangle[2] = xy2[2];
    9083           0 :                         xy2_triangle[3] = xy2[0];
    9084           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    9085             :                                                   intersection);
    9086           0 :                         if (intersection.size() >= 3)
    9087             :                         {
    9088           0 :                             dfWeight = getArea(intersection);
    9089             :                         }
    9090             : 
    9091           0 :                         xy2_triangle[1] = xy2[2];
    9092           0 :                         xy2_triangle[2] = xy2[3];
    9093           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    9094             :                                                   intersection);
    9095           0 :                         if (intersection.size() >= 3)
    9096             :                         {
    9097           0 :                             dfWeight += getArea(intersection);
    9098             :                         }
    9099             :                     }
    9100      785302 :                     if (dfWeight > 0.0)
    9101             :                     {
    9102      468828 :                         if (sp.dfArea == 0)
    9103       89592 :                             sp.dfArea = getArea(xy2);
    9104      468828 :                         dfWeight /= sp.dfArea;
    9105             :                     }
    9106             : 
    9107             : #ifdef CHECK_SUM_WITH_GEOS
    9108             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
    9109             :                                          sp.dfDstX0 - iDstX,
    9110             :                                          sp.dfDstY0 - iDstY);
    9111             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
    9112             :                                          sp.dfDstX1 - iDstX,
    9113             :                                          sp.dfDstY1 - iDstY);
    9114             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
    9115             :                                          sp.dfDstX2 - iDstX,
    9116             :                                          sp.dfDstY2 - iDstY);
    9117             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
    9118             :                                          sp.dfDstX3 - iDstX,
    9119             :                                          sp.dfDstY3 - iDstY);
    9120             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
    9121             :                                          sp.dfDstX0 - iDstX,
    9122             :                                          sp.dfDstY0 - iDstY);
    9123             : 
    9124             :                     double dfWeightGEOS = 0.0;
    9125             :                     auto hIntersection =
    9126             :                         GEOSIntersection_r(hGEOSContext, hP1, hP2);
    9127             :                     if (hIntersection)
    9128             :                     {
    9129             :                         double dfIntersArea = 0.0;
    9130             :                         if (GEOSArea_r(hGEOSContext, hIntersection,
    9131             :                                        &dfIntersArea) &&
    9132             :                             dfIntersArea > 0)
    9133             :                         {
    9134             :                             double dfSourceArea = 0.0;
    9135             :                             if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
    9136             :                             {
    9137             :                                 dfWeightGEOS = dfIntersArea / dfSourceArea;
    9138             :                             }
    9139             :                         }
    9140             :                         GEOSGeom_destroy_r(hGEOSContext, hIntersection);
    9141             :                     }
    9142             :                     if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
    9143             :                     {
    9144             :                         /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
    9145             :                                         dfWeight, dfWeightGEOS);
    9146             :                         printf("xy2: ");  // ok
    9147             :                         for (const auto &xy : xy2)
    9148             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    9149             :                         printf("\n");                                   // ok
    9150             :                         printf("intersection: ");                       // ok
    9151             :                         for (const auto &xy : intersection)
    9152             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    9153             :                         printf("\n");                                   // ok
    9154             :                     }
    9155             : #endif
    9156             :                 }
    9157      801628 :                 if (dfWeight > 0.0)
    9158             :                 {
    9159             : #ifdef DEBUG_VERBOSE
    9160             : #if defined(DST_X) && defined(DST_Y)
    9161             :                     if (iDstX + poWK->nDstXOff == DST_X &&
    9162             :                         iDstY + poWK->nDstYOff == DST_Y)
    9163             :                     {
    9164             :                         CPLDebug("WARP",
    9165             :                                  "iSrcX = %d, iSrcY = %d, weight =%.17g",
    9166             :                                  sp.iSrcX + poWK->nSrcXOff,
    9167             :                                  sp.iSrcY + poWK->nSrcYOff, dfWeight);
    9168             :                     }
    9169             : #endif
    9170             : #endif
    9171             : 
    9172      474104 :                     const GPtrDiff_t iSrcOffset =
    9173      474104 :                         sp.iSrcX +
    9174      474104 :                         static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
    9175      474104 :                     dfTotalWeight += dfWeight;
    9176             : 
    9177      474104 :                     if (poWK->pafUnifiedSrcDensity != nullptr)
    9178             :                     {
    9179           0 :                         dfDensity +=
    9180           0 :                             dfWeight *
    9181           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    9182             :                     }
    9183             :                     else
    9184             :                     {
    9185      474104 :                         dfDensity += dfWeight;
    9186             :                     }
    9187             : 
    9188     1818730 :                     for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    9189             :                     {
    9190             :                         // Returns pixel value if it is not no data.
    9191             :                         double dfBandDensity;
    9192             :                         double dfRealValue;
    9193             :                         double dfImagValue;
    9194     2689250 :                         if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
    9195             :                                                &dfBandDensity, &dfRealValue,
    9196             :                                                &dfImagValue) &&
    9197     1344620 :                               dfBandDensity > BAND_DENSITY_THRESHOLD))
    9198             :                         {
    9199           0 :                             continue;
    9200             :                         }
    9201             : #ifdef DEBUG_VERBOSE
    9202             : #if defined(DST_X) && defined(DST_Y)
    9203             :                         if (iDstX + poWK->nDstXOff == DST_X &&
    9204             :                             iDstY + poWK->nDstYOff == DST_Y)
    9205             :                         {
    9206             :                             CPLDebug("WARP", "value * weight = %.17g",
    9207             :                                      dfRealValue * dfWeight);
    9208             :                         }
    9209             : #endif
    9210             : #endif
    9211             : 
    9212     1344620 :                         adfRealValue[iBand] += dfRealValue * dfWeight;
    9213     1344620 :                         adfImagValue[iBand] += dfImagValue * dfWeight;
    9214     1344620 :                         adfBandDensity[iBand] += dfBandDensity * dfWeight;
    9215     1344620 :                         adfWeight[iBand] += dfWeight;
    9216             :                     }
    9217             :                 }
    9218             :             }
    9219             : 
    9220      218919 :             CPLFree(pahSourcePixel);
    9221             : 
    9222             :             /* --------------------------------------------------------------------
    9223             :              */
    9224             :             /*          Update destination pixel value. */
    9225             :             /* --------------------------------------------------------------------
    9226             :              */
    9227      218919 :             bool bHasFoundDensity = false;
    9228      218919 :             const GPtrDiff_t iDstOffset =
    9229      218919 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    9230      827838 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    9231             :             {
    9232      608919 :                 if (adfWeight[iBand] > 0)
    9233             :                 {
    9234             :                     const double dfBandDensity =
    9235      608909 :                         adfBandDensity[iBand] / adfWeight[iBand];
    9236      608909 :                     if (dfBandDensity > BAND_DENSITY_THRESHOLD)
    9237             :                     {
    9238      608909 :                         bHasFoundDensity = true;
    9239      608909 :                         GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    9240      608909 :                                          adfRealValue[iBand],
    9241      608909 :                                          adfImagValue[iBand],
    9242             :                                          bAvoidNoDataSingleBand);
    9243             :                     }
    9244             :                 }
    9245             :             }
    9246             : 
    9247      218919 :             if (!bHasFoundDensity)
    9248          10 :                 continue;
    9249             : 
    9250      218909 :             if (!bAvoidNoDataSingleBand)
    9251             :             {
    9252           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    9253             :             }
    9254             : 
    9255             :             /* --------------------------------------------------------------------
    9256             :              */
    9257             :             /*          Update destination density/validity masks. */
    9258             :             /* --------------------------------------------------------------------
    9259             :              */
    9260      218909 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
    9261             : 
    9262      218909 :             if (poWK->panDstValid != nullptr)
    9263             :             {
    9264       11752 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    9265             :             }
    9266             :         }
    9267             : 
    9268             :         /* --------------------------------------------------------------------
    9269             :          */
    9270             :         /*      Report progress to the user, and optionally cancel out. */
    9271             :         /* --------------------------------------------------------------------
    9272             :          */
    9273        1932 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    9274           0 :             break;
    9275             :     }
    9276             : 
    9277             : #ifdef CHECK_SUM_WITH_GEOS
    9278             :     GEOSGeom_destroy_r(hGEOSContext, hP1);
    9279             :     GEOSGeom_destroy_r(hGEOSContext, hP2);
    9280             :     OGRGeometry::freeGEOSContext(hGEOSContext);
    9281             : #endif
    9282          19 :     CPLQuadTreeDestroy(hQuadTree);
    9283          19 : }

Generated by: LCOV version 1.14