LCOV - code coverage report
Current view: top level - alg - gdalwarpkernel.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 3473 4050 85.8 %
Date: 2026-02-06 10:43:15 Functions: 232 270 85.9 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  High Performance Image Reprojector
       4             :  * Purpose:  Implementation of the GDALWarpKernel class.  Implements the actual
       5             :  *           image warping for a "chunk" of input and output imagery already
       6             :  *           loaded into memory.
       7             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       8             :  *
       9             :  ******************************************************************************
      10             :  * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
      11             :  * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
      12             :  *
      13             :  * SPDX-License-Identifier: MIT
      14             :  ****************************************************************************/
      15             : 
      16             : #include "cpl_port.h"
      17             : #include "gdalwarper.h"
      18             : 
      19             : #include <cfloat>
      20             : #include <cmath>
      21             : #include <cstddef>
      22             : #include <cstdlib>
      23             : #include <cstring>
      24             : 
      25             : #include <algorithm>
      26             : #include <limits>
      27             : #include <mutex>
      28             : #include <new>
      29             : #include <utility>
      30             : #include <vector>
      31             : 
      32             : #include "cpl_atomic_ops.h"
      33             : #include "cpl_conv.h"
      34             : #include "cpl_error.h"
      35             : #include "cpl_float.h"
      36             : #include "cpl_mask.h"
      37             : #include "cpl_multiproc.h"
      38             : #include "cpl_progress.h"
      39             : #include "cpl_string.h"
      40             : #include "cpl_vsi.h"
      41             : #include "cpl_worker_thread_pool.h"
      42             : #include "cpl_quad_tree.h"
      43             : #include "gdal.h"
      44             : #include "gdal_alg.h"
      45             : #include "gdal_alg_priv.h"
      46             : #include "gdal_thread_pool.h"
      47             : #include "gdalresamplingkernels.h"
      48             : 
      49             : // #define CHECK_SUM_WITH_GEOS
      50             : #ifdef CHECK_SUM_WITH_GEOS
      51             : #include "ogr_geometry.h"
      52             : #include "ogr_geos.h"
      53             : #endif
      54             : 
      55             : #ifdef USE_NEON_OPTIMIZATIONS
      56             : #include "include_sse2neon.h"
      57             : #define USE_SSE2
      58             : 
      59             : #include "gdalsse_priv.h"
      60             : 
      61             : // We restrict to 64bit processors because they are guaranteed to have SSE2.
      62             : // Could possibly be used too on 32bit, but we would need to check at runtime.
      63             : #elif defined(__x86_64) || defined(_M_X64)
      64             : #define USE_SSE2
      65             : 
      66             : #include "gdalsse_priv.h"
      67             : 
      68             : #if __SSE4_1__
      69             : #include <smmintrin.h>
      70             : #endif
      71             : 
      72             : #if __SSE3__
      73             : #include <pmmintrin.h>
      74             : #endif
      75             : 
      76             : #endif
      77             : 
      78             : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
      79             : constexpr float SRC_DENSITY_THRESHOLD_FLOAT = 0.000000001f;
      80             : constexpr double SRC_DENSITY_THRESHOLD_DOUBLE = 0.000000001;
      81             : 
      82             : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
      83             : 
      84             : static const int anGWKFilterRadius[] = {
      85             :     0,  // Nearest neighbour
      86             :     1,  // Bilinear
      87             :     2,  // Cubic Convolution (Catmull-Rom)
      88             :     2,  // Cubic B-Spline
      89             :     3,  // Lanczos windowed sinc
      90             :     0,  // Average
      91             :     0,  // Mode
      92             :     0,  // Reserved GRA_Gauss=7
      93             :     0,  // Max
      94             :     0,  // Min
      95             :     0,  // Med
      96             :     0,  // Q1
      97             :     0,  // Q3
      98             :     0,  // Sum
      99             :     0,  // RMS
     100             : };
     101             : 
     102             : static double GWKBilinear(double dfX);
     103             : static double GWKCubic(double dfX);
     104             : static double GWKBSpline(double dfX);
     105             : static double GWKLanczosSinc(double dfX);
     106             : 
     107             : static const FilterFuncType apfGWKFilter[] = {
     108             :     nullptr,         // Nearest neighbour
     109             :     GWKBilinear,     // Bilinear
     110             :     GWKCubic,        // Cubic Convolution (Catmull-Rom)
     111             :     GWKBSpline,      // Cubic B-Spline
     112             :     GWKLanczosSinc,  // Lanczos windowed sinc
     113             :     nullptr,         // Average
     114             :     nullptr,         // Mode
     115             :     nullptr,         // Reserved GRA_Gauss=7
     116             :     nullptr,         // Max
     117             :     nullptr,         // Min
     118             :     nullptr,         // Med
     119             :     nullptr,         // Q1
     120             :     nullptr,         // Q3
     121             :     nullptr,         // Sum
     122             :     nullptr,         // RMS
     123             : };
     124             : 
     125             : // TODO(schwehr): Can we make these functions have a const * const arg?
     126             : static double GWKBilinear4Values(double *padfVals);
     127             : static double GWKCubic4Values(double *padfVals);
     128             : static double GWKBSpline4Values(double *padfVals);
     129             : static double GWKLanczosSinc4Values(double *padfVals);
     130             : 
     131             : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
     132             :     nullptr,                // Nearest neighbour
     133             :     GWKBilinear4Values,     // Bilinear
     134             :     GWKCubic4Values,        // Cubic Convolution (Catmull-Rom)
     135             :     GWKBSpline4Values,      // Cubic B-Spline
     136             :     GWKLanczosSinc4Values,  // Lanczos windowed sinc
     137             :     nullptr,                // Average
     138             :     nullptr,                // Mode
     139             :     nullptr,                // Reserved GRA_Gauss=7
     140             :     nullptr,                // Max
     141             :     nullptr,                // Min
     142             :     nullptr,                // Med
     143             :     nullptr,                // Q1
     144             :     nullptr,                // Q3
     145             :     nullptr,                // Sum
     146             :     nullptr,                // RMS
     147             : };
     148             : 
     149       13424 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
     150             : {
     151             :     static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
     152             :                   "Bad size of anGWKFilterRadius");
     153       13424 :     return anGWKFilterRadius[eResampleAlg];
     154             : }
     155             : 
     156        5093 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
     157             : {
     158             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
     159             :                   "Bad size of apfGWKFilter");
     160        5093 :     return apfGWKFilter[eResampleAlg];
     161             : }
     162             : 
     163        5093 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
     164             : {
     165             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
     166             :                   "Bad size of apfGWKFilter4Values");
     167        5093 :     return apfGWKFilter4Values[eResampleAlg];
     168             : }
     169             : 
     170             : static CPLErr GWKGeneralCase(GDALWarpKernel *);
     171             : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
     172             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     173             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     174             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     175             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     176             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     177             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     178             : #endif
     179             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     180             : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
     181             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     182             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     183             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     184             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     185             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     186             : #endif
     187             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     188             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     189             : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
     190             : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
     191             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     192             : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
     193             : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
     194             : static CPLErr GWKSumPreserving(GDALWarpKernel *);
     195             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     196             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     197             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     198             : 
     199             : /************************************************************************/
     200             : /*                             GWKJobStruct                             */
     201             : /************************************************************************/
     202             : 
     203             : struct GWKJobStruct
     204             : {
     205             :     std::mutex &mutex;
     206             :     std::condition_variable &cv;
     207             :     int counterSingleThreaded = 0;
     208             :     int &counter;
     209             :     bool &stopFlag;
     210             :     GDALWarpKernel *poWK = nullptr;
     211             :     int iYMin = 0;
     212             :     int iYMax = 0;
     213             :     int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
     214             :     void *pTransformerArg = nullptr;
     215             :     // used by GWKRun() to assign the proper pTransformerArg
     216             :     void (*pfnFunc)(void *) = nullptr;
     217             : 
     218        2934 :     GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
     219             :                  int &counter_, bool &stopFlag_)
     220        2934 :         : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
     221             :     {
     222        2934 :     }
     223             : };
     224             : 
     225             : struct GWKThreadData
     226             : {
     227             :     std::unique_ptr<CPLJobQueue> poJobQueue{};
     228             :     std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
     229             :     int nMaxThreads{0};
     230             :     int counter{0};
     231             :     bool stopFlag{false};
     232             :     std::mutex mutex{};
     233             :     std::condition_variable cv{};
     234             :     bool bTransformerArgInputAssignedToThread{false};
     235             :     void *pTransformerArgInput{
     236             :         nullptr};  // owned by calling layer. Not to be destroyed
     237             :     std::map<GIntBig, void *> mapThreadToTransformerArg{};
     238             :     int nTotalThreadCountForThisRun = 0;
     239             :     int nCurThreadCountForThisRun = 0;
     240             : };
     241             : 
     242             : /************************************************************************/
     243             : /*                         GWKProgressThread()                          */
     244             : /************************************************************************/
     245             : 
     246             : // Return TRUE if the computation must be interrupted.
     247          36 : static int GWKProgressThread(GWKJobStruct *psJob)
     248             : {
     249          36 :     bool stop = false;
     250             :     {
     251          36 :         std::lock_guard<std::mutex> lock(psJob->mutex);
     252          36 :         psJob->counter++;
     253          36 :         stop = psJob->stopFlag;
     254             :     }
     255          36 :     psJob->cv.notify_one();
     256             : 
     257          36 :     return stop;
     258             : }
     259             : 
     260             : /************************************************************************/
     261             : /*                       GWKProgressMonoThread()                        */
     262             : /************************************************************************/
     263             : 
     264             : // Return TRUE if the computation must be interrupted.
     265      378762 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
     266             : {
     267      378762 :     GDALWarpKernel *poWK = psJob->poWK;
     268      378762 :     if (!poWK->pfnProgress(poWK->dfProgressBase +
     269      378762 :                                poWK->dfProgressScale *
     270      378762 :                                    (++psJob->counterSingleThreaded /
     271      378762 :                                     static_cast<double>(psJob->iYMax)),
     272             :                            "", poWK->pProgress))
     273             :     {
     274           1 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     275           1 :         psJob->stopFlag = true;
     276           1 :         return TRUE;
     277             :     }
     278      378761 :     return FALSE;
     279             : }
     280             : 
     281             : /************************************************************************/
     282             : /*                        GWKGenericMonoThread()                        */
     283             : /************************************************************************/
     284             : 
     285        2912 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
     286             :                                    void (*pfnFunc)(void *pUserData))
     287             : {
     288        2912 :     GWKThreadData td;
     289             : 
     290             :     // NOTE: the mutex is not used.
     291        2912 :     GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
     292        2912 :     job.poWK = poWK;
     293        2912 :     job.iYMin = 0;
     294        2912 :     job.iYMax = poWK->nDstYSize;
     295        2912 :     job.pfnProgress = GWKProgressMonoThread;
     296        2912 :     job.pTransformerArg = poWK->pTransformerArg;
     297        2912 :     job.counterSingleThreaded = td.counter;
     298        2912 :     pfnFunc(&job);
     299        2912 :     td.counter = job.counterSingleThreaded;
     300             : 
     301        5824 :     return td.stopFlag ? CE_Failure : CE_None;
     302             : }
     303             : 
     304             : /************************************************************************/
     305             : /*                          GWKThreadsCreate()                          */
     306             : /************************************************************************/
     307             : 
     308        1746 : void *GWKThreadsCreate(char **papszWarpOptions,
     309             :                        GDALTransformerFunc /* pfnTransformer */,
     310             :                        void *pTransformerArg)
     311             : {
     312             :     const char *pszWarpThreads =
     313        1746 :         CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
     314        1746 :     if (pszWarpThreads == nullptr)
     315        1729 :         pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
     316             : 
     317        1746 :     int nThreads = 0;
     318        1746 :     if (EQUAL(pszWarpThreads, "ALL_CPUS"))
     319           3 :         nThreads = CPLGetNumCPUs();
     320             :     else
     321        1743 :         nThreads = atoi(pszWarpThreads);
     322        1746 :     if (nThreads <= 1)
     323        1724 :         nThreads = 0;
     324        1746 :     if (nThreads > 128)
     325           0 :         nThreads = 128;
     326             : 
     327        1746 :     GWKThreadData *psThreadData = new GWKThreadData();
     328             :     auto poThreadPool =
     329        1746 :         nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
     330        1746 :     if (nThreads && poThreadPool)
     331             :     {
     332          22 :         psThreadData->nMaxThreads = nThreads;
     333          22 :         psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
     334             :             nThreads,
     335          22 :             GWKJobStruct(psThreadData->mutex, psThreadData->cv,
     336          44 :                          psThreadData->counter, psThreadData->stopFlag)));
     337             : 
     338          22 :         psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
     339          22 :         psThreadData->pTransformerArgInput = pTransformerArg;
     340             :     }
     341             : 
     342        1746 :     return psThreadData;
     343             : }
     344             : 
     345             : /************************************************************************/
     346             : /*                           GWKThreadsEnd()                            */
     347             : /************************************************************************/
     348             : 
     349        1746 : void GWKThreadsEnd(void *psThreadDataIn)
     350             : {
     351        1746 :     if (psThreadDataIn == nullptr)
     352           0 :         return;
     353             : 
     354        1746 :     GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
     355        1746 :     if (psThreadData->poJobQueue)
     356             :     {
     357             :         // cppcheck-suppress constVariableReference
     358          32 :         for (auto &pair : psThreadData->mapThreadToTransformerArg)
     359             :         {
     360          10 :             CPLAssert(pair.second != psThreadData->pTransformerArgInput);
     361          10 :             GDALDestroyTransformer(pair.second);
     362             :         }
     363          22 :         psThreadData->poJobQueue.reset();
     364             :     }
     365        1746 :     delete psThreadData;
     366             : }
     367             : 
     368             : /************************************************************************/
     369             : /*                         ThreadFuncAdapter()                          */
     370             : /************************************************************************/
     371             : 
     372          31 : static void ThreadFuncAdapter(void *pData)
     373             : {
     374          31 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
     375          31 :     GWKThreadData *psThreadData =
     376          31 :         static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
     377             : 
     378             :     // Look if we have already a per-thread transformer
     379          31 :     void *pTransformerArg = nullptr;
     380          31 :     const GIntBig nThreadId = CPLGetPID();
     381             : 
     382             :     {
     383          62 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     384          31 :         ++psThreadData->nCurThreadCountForThisRun;
     385             : 
     386          31 :         auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
     387          31 :         if (oIter != psThreadData->mapThreadToTransformerArg.end())
     388             :         {
     389           0 :             pTransformerArg = oIter->second;
     390             :         }
     391          31 :         else if (!psThreadData->bTransformerArgInputAssignedToThread &&
     392          31 :                  psThreadData->nCurThreadCountForThisRun ==
     393          31 :                      psThreadData->nTotalThreadCountForThisRun)
     394             :         {
     395             :             // If we are the last thread to be started, temporarily borrow the
     396             :             // original transformer
     397          21 :             psThreadData->bTransformerArgInputAssignedToThread = true;
     398          21 :             pTransformerArg = psThreadData->pTransformerArgInput;
     399          21 :             psThreadData->mapThreadToTransformerArg[nThreadId] =
     400             :                 pTransformerArg;
     401             :         }
     402             : 
     403          31 :         if (pTransformerArg == nullptr)
     404             :         {
     405          10 :             CPLAssert(psThreadData->pTransformerArgInput != nullptr);
     406          10 :             CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
     407             :         }
     408             :     }
     409             : 
     410             :     // If no transformer assigned to current thread, instantiate one
     411          31 :     if (pTransformerArg == nullptr)
     412             :     {
     413             :         // This somehow assumes that GDALCloneTransformer() is thread-safe
     414             :         // which should normally be the case.
     415             :         pTransformerArg =
     416          10 :             GDALCloneTransformer(psThreadData->pTransformerArgInput);
     417             : 
     418             :         // Lock for the stop flag and the transformer map.
     419          10 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     420          10 :         if (!pTransformerArg)
     421             :         {
     422           0 :             psJob->stopFlag = true;
     423           0 :             return;
     424             :         }
     425          10 :         psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
     426             :     }
     427             : 
     428          31 :     psJob->pTransformerArg = pTransformerArg;
     429          31 :     psJob->pfnFunc(pData);
     430             : 
     431             :     // Give back original transformer, if borrowed.
     432             :     {
     433          62 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     434          31 :         if (psThreadData->bTransformerArgInputAssignedToThread &&
     435          21 :             pTransformerArg == psThreadData->pTransformerArgInput)
     436             :         {
     437             :             psThreadData->mapThreadToTransformerArg.erase(
     438          21 :                 psThreadData->mapThreadToTransformerArg.find(nThreadId));
     439          21 :             psThreadData->bTransformerArgInputAssignedToThread = false;
     440             :         }
     441             :     }
     442             : }
     443             : 
     444             : /************************************************************************/
     445             : /*                               GWKRun()                               */
     446             : /************************************************************************/
     447             : 
     448        2933 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
     449             :                      void (*pfnFunc)(void *pUserData))
     450             : 
     451             : {
     452        2933 :     const int nDstYSize = poWK->nDstYSize;
     453             : 
     454        2933 :     CPLDebug("GDAL",
     455             :              "GDALWarpKernel()::%s() "
     456             :              "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
     457             :              pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
     458             :              poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
     459             :              poWK->nDstYSize);
     460             : 
     461        2933 :     if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
     462             :     {
     463           0 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     464           0 :         return CE_Failure;
     465             :     }
     466             : 
     467        2933 :     GWKThreadData *psThreadData =
     468             :         static_cast<GWKThreadData *>(poWK->psThreadData);
     469        2933 :     if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
     470             :     {
     471        2912 :         return GWKGenericMonoThread(poWK, pfnFunc);
     472             :     }
     473             : 
     474          21 :     int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
     475             :     // Config option mostly useful for tests to be able to test multithreading
     476             :     // with small rasters
     477             :     const int nWarpChunkSize =
     478          21 :         atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
     479          21 :     if (nWarpChunkSize > 0)
     480             :     {
     481          19 :         GIntBig nChunks =
     482          19 :             static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
     483          19 :         if (nThreads > nChunks)
     484          14 :             nThreads = static_cast<int>(nChunks);
     485             :     }
     486          21 :     if (nThreads <= 0)
     487          17 :         nThreads = 1;
     488             : 
     489          21 :     CPLDebug("WARP", "Using %d threads", nThreads);
     490             : 
     491          21 :     auto &jobs = *psThreadData->threadJobs;
     492          21 :     CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
     493             :     // Fill-in job structures.
     494          52 :     for (int i = 0; i < nThreads; ++i)
     495             :     {
     496          31 :         auto &job = jobs[i];
     497          31 :         job.poWK = poWK;
     498          31 :         job.iYMin =
     499          31 :             static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
     500          31 :         job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
     501          31 :                                      nThreads);
     502          31 :         if (poWK->pfnProgress != GDALDummyProgress)
     503           2 :             job.pfnProgress = GWKProgressThread;
     504          31 :         job.pfnFunc = pfnFunc;
     505             :     }
     506             : 
     507             :     bool bStopFlag;
     508             :     {
     509          21 :         std::unique_lock<std::mutex> lock(psThreadData->mutex);
     510             : 
     511          21 :         psThreadData->nTotalThreadCountForThisRun = nThreads;
     512             :         // coverity[missing_lock]
     513          21 :         psThreadData->nCurThreadCountForThisRun = 0;
     514             : 
     515             :         // Start jobs.
     516          52 :         for (int i = 0; i < nThreads; ++i)
     517             :         {
     518          31 :             auto &job = jobs[i];
     519          31 :             psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
     520             :                                                 static_cast<void *>(&job));
     521             :         }
     522             : 
     523             :         /* --------------------------------------------------------------------
     524             :          */
     525             :         /*      Report progress. */
     526             :         /* --------------------------------------------------------------------
     527             :          */
     528          21 :         if (poWK->pfnProgress != GDALDummyProgress)
     529             :         {
     530          12 :             while (psThreadData->counter < nDstYSize)
     531             :             {
     532          11 :                 psThreadData->cv.wait(lock);
     533          11 :                 if (!poWK->pfnProgress(poWK->dfProgressBase +
     534          11 :                                            poWK->dfProgressScale *
     535          11 :                                                (psThreadData->counter /
     536          11 :                                                 static_cast<double>(nDstYSize)),
     537             :                                        "", poWK->pProgress))
     538             :                 {
     539           1 :                     CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     540           1 :                     psThreadData->stopFlag = true;
     541           1 :                     break;
     542             :                 }
     543             :             }
     544             :         }
     545             : 
     546          21 :         bStopFlag = psThreadData->stopFlag;
     547             :     }
     548             : 
     549             :     /* -------------------------------------------------------------------- */
     550             :     /*      Wait for all jobs to complete.                                  */
     551             :     /* -------------------------------------------------------------------- */
     552          21 :     psThreadData->poJobQueue->WaitCompletion();
     553             : 
     554          21 :     return bStopFlag ? CE_Failure : CE_None;
     555             : }
     556             : 
     557             : /************************************************************************/
     558             : /* ==================================================================== */
     559             : /*                            GDALWarpKernel                            */
     560             : /* ==================================================================== */
     561             : /************************************************************************/
     562             : 
     563             : /**
     564             :  * \class GDALWarpKernel "gdalwarper.h"
     565             :  *
     566             :  * Low level image warping class.
     567             :  *
     568             :  * This class is responsible for low level image warping for one
     569             :  * "chunk" of imagery.  The class is essentially a structure with all
     570             :  * data members public - primarily so that new special-case functions
     571             :  * can be added without changing the class declaration.
     572             :  *
     573             :  * Applications are normally intended to interactive with warping facilities
     574             :  * through the GDALWarpOperation class, though the GDALWarpKernel can in
     575             :  * theory be used directly if great care is taken in setting up the
     576             :  * control data.
     577             :  *
     578             :  * <h3>Design Issues</h3>
     579             :  *
     580             :  * The intention is that PerformWarp() would analyze the setup in terms
     581             :  * of the datatype, resampling type, and validity/density mask usage and
     582             :  * pick one of many specific implementations of the warping algorithm over
     583             :  * a continuum of optimization vs. generality.  At one end there will be a
     584             :  * reference general purpose implementation of the algorithm that supports
     585             :  * any data type (working internally in double precision complex), all three
     586             :  * resampling types, and any or all of the validity/density masks.  At the
     587             :  * other end would be highly optimized algorithms for common cases like
     588             :  * nearest neighbour resampling on GDT_UInt8 data with no masks.
     589             :  *
     590             :  * The full set of optimized versions have not been decided but we should
     591             :  * expect to have at least:
     592             :  *  - One for each resampling algorithm for 8bit data with no masks.
     593             :  *  - One for each resampling algorithm for float data with no masks.
     594             :  *  - One for each resampling algorithm for float data with any/all masks
     595             :  *    (essentially the generic case for just float data).
     596             :  *  - One for each resampling algorithm for 8bit data with support for
     597             :  *    input validity masks (per band or per pixel).  This handles the common
     598             :  *    case of nodata masking.
     599             :  *  - One for each resampling algorithm for float data with support for
     600             :  *    input validity masks (per band or per pixel).  This handles the common
     601             :  *    case of nodata masking.
     602             :  *
     603             :  * Some of the specializations would operate on all bands in one pass
     604             :  * (especially the ones without masking would do this), while others might
     605             :  * process each band individually to reduce code complexity.
     606             :  *
     607             :  * <h3>Masking Semantics</h3>
     608             :  *
     609             :  * A detailed explanation of the semantics of the validity and density masks,
     610             :  * and their effects on resampling kernels is needed here.
     611             :  */
     612             : 
     613             : /************************************************************************/
     614             : /*                     GDALWarpKernel Data Members                      */
     615             : /************************************************************************/
     616             : 
     617             : /**
     618             :  * \var GDALResampleAlg GDALWarpKernel::eResample;
     619             :  *
     620             :  * Resampling algorithm.
     621             :  *
     622             :  * The resampling algorithm to use.  One of GRA_NearestNeighbour, GRA_Bilinear,
     623             :  * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
     624             :  * GRA_Mode or GRA_Sum.
     625             :  *
     626             :  * This field is required. GDT_NearestNeighbour may be used as a default
     627             :  * value.
     628             :  */
     629             : 
     630             : /**
     631             :  * \var GDALDataType GDALWarpKernel::eWorkingDataType;
     632             :  *
     633             :  * Working pixel data type.
     634             :  *
     635             :  * The datatype of pixels in the source image (papabySrcimage) and
     636             :  * destination image (papabyDstImage) buffers.  Note that operations on
     637             :  * some data types (such as GDT_UInt8) may be much better optimized than other
     638             :  * less common cases.
     639             :  *
     640             :  * This field is required.  It may not be GDT_Unknown.
     641             :  */
     642             : 
     643             : /**
     644             :  * \var int GDALWarpKernel::nBands;
     645             :  *
     646             :  * Number of bands.
     647             :  *
     648             :  * The number of bands (layers) of imagery being warped.  Determines the
     649             :  * number of entries in the papabySrcImage, papanBandSrcValid,
     650             :  * and papabyDstImage arrays.
     651             :  *
     652             :  * This field is required.
     653             :  */
     654             : 
     655             : /**
     656             :  * \var int GDALWarpKernel::nSrcXSize;
     657             :  *
     658             :  * Source image width in pixels.
     659             :  *
     660             :  * This field is required.
     661             :  */
     662             : 
     663             : /**
     664             :  * \var int GDALWarpKernel::nSrcYSize;
     665             :  *
     666             :  * Source image height in pixels.
     667             :  *
     668             :  * This field is required.
     669             :  */
     670             : 
     671             : /**
     672             :  * \var double GDALWarpKernel::dfSrcXExtraSize;
     673             :  *
     674             :  * Number of pixels included in nSrcXSize that are present on the edges of
     675             :  * the area of interest to take into account the width of the kernel.
     676             :  *
     677             :  * This field is required.
     678             :  */
     679             : 
     680             : /**
     681             :  * \var double GDALWarpKernel::dfSrcYExtraSize;
     682             :  *
     683             :  * Number of pixels included in nSrcYExtraSize that are present on the edges of
     684             :  * the area of interest to take into account the height of the kernel.
     685             :  *
     686             :  * This field is required.
     687             :  */
     688             : 
     689             : /**
     690             :  * \var int GDALWarpKernel::papabySrcImage;
     691             :  *
     692             :  * Array of source image band data.
     693             :  *
     694             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     695             :  * to image data.  Each individual band of image data is organized as a single
     696             :  * block of image data in left to right, then bottom to top order.  The actual
     697             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     698             :  *
     699             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     700             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     701             :  * this:
     702             :  *
     703             :  * \code
     704             :  *   float dfPixelValue;
     705             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     706             :  *   int   nPixel = 3; // Zero based.
     707             :  *   int   nLine = 4;  // Zero based.
     708             :  *
     709             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     710             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     711             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     712             :  *   dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
     713             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     714             :  * \endcode
     715             :  *
     716             :  * This field is required.
     717             :  */
     718             : 
     719             : /**
     720             :  * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
     721             :  *
     722             :  * Per band validity mask for source pixels.
     723             :  *
     724             :  * Array of pixel validity mask layers for each source band.   Each of
     725             :  * the mask layers is the same size (in pixels) as the source image with
     726             :  * one bit per pixel.  Note that it is legal (and common) for this to be
     727             :  * NULL indicating that none of the pixels are invalidated, or for some
     728             :  * band validity masks to be NULL in which case all pixels of the band are
     729             :  * valid.  The following code can be used to test the validity of a particular
     730             :  * pixel.
     731             :  *
     732             :  * \code
     733             :  *   int   bIsValid = TRUE;
     734             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     735             :  *   int   nPixel = 3; // Zero based.
     736             :  *   int   nLine = 4;  // Zero based.
     737             :  *
     738             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     739             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     740             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     741             :  *
     742             :  *   if( poKern->papanBandSrcValid != NULL
     743             :  *       && poKern->papanBandSrcValid[nBand] != NULL )
     744             :  *   {
     745             :  *       GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
     746             :  *       int    iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
     747             :  *
     748             :  *       bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
     749             :  *   }
     750             :  * \endcode
     751             :  */
     752             : 
     753             : /**
     754             :  * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
     755             :  *
     756             :  * Per pixel validity mask for source pixels.
     757             :  *
     758             :  * A single validity mask layer that applies to the pixels of all source
     759             :  * bands.  It is accessed similarly to papanBandSrcValid, but without the
     760             :  * extra level of band indirection.
     761             :  *
     762             :  * This pointer may be NULL indicating that all pixels are valid.
     763             :  *
     764             :  * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
     765             :  * the pixel isn't considered to be valid unless both arrays indicate it is
     766             :  * valid.
     767             :  */
     768             : 
     769             : /**
     770             :  * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
     771             :  *
     772             :  * Per pixel density mask for source pixels.
     773             :  *
     774             :  * A single density mask layer that applies to the pixels of all source
     775             :  * bands.  It contains values between 0.0 and 1.0 indicating the degree to
     776             :  * which this pixel should be allowed to contribute to the output result.
     777             :  *
     778             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     779             :  *
     780             :  * The density for a pixel may be accessed like this:
     781             :  *
     782             :  * \code
     783             :  *   float fDensity = 1.0;
     784             :  *   int nPixel = 3;  // Zero based.
     785             :  *   int nLine = 4;   // Zero based.
     786             :  *
     787             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     788             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     789             :  *   if( poKern->pafUnifiedSrcDensity != NULL )
     790             :  *     fDensity = poKern->pafUnifiedSrcDensity
     791             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     792             :  * \endcode
     793             :  */
     794             : 
     795             : /**
     796             :  * \var int GDALWarpKernel::nDstXSize;
     797             :  *
     798             :  * Width of destination image in pixels.
     799             :  *
     800             :  * This field is required.
     801             :  */
     802             : 
     803             : /**
     804             :  * \var int GDALWarpKernel::nDstYSize;
     805             :  *
     806             :  * Height of destination image in pixels.
     807             :  *
     808             :  * This field is required.
     809             :  */
     810             : 
     811             : /**
     812             :  * \var GByte **GDALWarpKernel::papabyDstImage;
     813             :  *
     814             :  * Array of destination image band data.
     815             :  *
     816             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     817             :  * to image data.  Each individual band of image data is organized as a single
     818             :  * block of image data in left to right, then bottom to top order.  The actual
     819             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     820             :  *
     821             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     822             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     823             :  * this:
     824             :  *
     825             :  * \code
     826             :  *   float dfPixelValue;
     827             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     828             :  *   int   nPixel = 3; // Zero based.
     829             :  *   int   nLine = 4;  // Zero based.
     830             :  *
     831             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     832             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     833             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     834             :  *   dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
     835             :  *                                  [nPixel + nLine * poKern->nSrcYSize];
     836             :  * \endcode
     837             :  *
     838             :  * This field is required.
     839             :  */
     840             : 
     841             : /**
     842             :  * \var GUInt32 *GDALWarpKernel::panDstValid;
     843             :  *
     844             :  * Per pixel validity mask for destination pixels.
     845             :  *
     846             :  * A single validity mask layer that applies to the pixels of all destination
     847             :  * bands.  It is accessed similarly to papanUnitifiedSrcValid, but based
     848             :  * on the size of the destination image.
     849             :  *
     850             :  * This pointer may be NULL indicating that all pixels are valid.
     851             :  */
     852             : 
     853             : /**
     854             :  * \var float *GDALWarpKernel::pafDstDensity;
     855             :  *
     856             :  * Per pixel density mask for destination pixels.
     857             :  *
     858             :  * A single density mask layer that applies to the pixels of all destination
     859             :  * bands.  It contains values between 0.0 and 1.0.
     860             :  *
     861             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     862             :  *
     863             :  * The density for a pixel may be accessed like this:
     864             :  *
     865             :  * \code
     866             :  *   float fDensity = 1.0;
     867             :  *   int   nPixel = 3; // Zero based.
     868             :  *   int   nLine = 4;  // Zero based.
     869             :  *
     870             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     871             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     872             :  *   if( poKern->pafDstDensity != NULL )
     873             :  *     fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
     874             :  * \endcode
     875             :  */
     876             : 
     877             : /**
     878             :  * \var int GDALWarpKernel::nSrcXOff;
     879             :  *
     880             :  * X offset to source pixel coordinates for transformation.
     881             :  *
     882             :  * See pfnTransformer.
     883             :  *
     884             :  * This field is required.
     885             :  */
     886             : 
     887             : /**
     888             :  * \var int GDALWarpKernel::nSrcYOff;
     889             :  *
     890             :  * Y offset to source pixel coordinates for transformation.
     891             :  *
     892             :  * See pfnTransformer.
     893             :  *
     894             :  * This field is required.
     895             :  */
     896             : 
     897             : /**
     898             :  * \var int GDALWarpKernel::nDstXOff;
     899             :  *
     900             :  * X offset to destination pixel coordinates for transformation.
     901             :  *
     902             :  * See pfnTransformer.
     903             :  *
     904             :  * This field is required.
     905             :  */
     906             : 
     907             : /**
     908             :  * \var int GDALWarpKernel::nDstYOff;
     909             :  *
     910             :  * Y offset to destination pixel coordinates for transformation.
     911             :  *
     912             :  * See pfnTransformer.
     913             :  *
     914             :  * This field is required.
     915             :  */
     916             : 
     917             : /**
     918             :  * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
     919             :  *
     920             :  * Source/destination location transformer.
     921             :  *
     922             :  * The function to call to transform coordinates between source image
     923             :  * pixel/line coordinates and destination image pixel/line coordinates.
     924             :  * See GDALTransformerFunc() for details of the semantics of this function.
     925             :  *
     926             :  * The GDALWarpKern algorithm will only ever use this transformer in
     927             :  * "destination to source" mode (bDstToSrc=TRUE), and will always pass
     928             :  * partial or complete scanlines of points in the destination image as
     929             :  * input.  This means, among other things, that it is safe to the
     930             :  * approximating transform GDALApproxTransform() as the transformation
     931             :  * function.
     932             :  *
     933             :  * Source and destination images may be subsets of a larger overall image.
     934             :  * The transformation algorithms will expect and return pixel/line coordinates
     935             :  * in terms of this larger image, so coordinates need to be offset by
     936             :  * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
     937             :  * passing to pfnTransformer, and after return from it.
     938             :  *
     939             :  * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
     940             :  * data to this function when it is called.
     941             :  *
     942             :  * This field is required.
     943             :  */
     944             : 
     945             : /**
     946             :  * \var void *GDALWarpKernel::pTransformerArg;
     947             :  *
     948             :  * Callback data for pfnTransformer.
     949             :  *
     950             :  * This field may be NULL if not required for the pfnTransformer being used.
     951             :  */
     952             : 
     953             : /**
     954             :  * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
     955             :  *
     956             :  * The function to call to report progress of the algorithm, and to check
     957             :  * for a requested termination of the operation.  It operates according to
     958             :  * GDALProgressFunc() semantics.
     959             :  *
     960             :  * Generally speaking the progress function will be invoked for each
     961             :  * scanline of the destination buffer that has been processed.
     962             :  *
     963             :  * This field may be NULL (internally set to GDALDummyProgress()).
     964             :  */
     965             : 
     966             : /**
     967             :  * \var void *GDALWarpKernel::pProgress;
     968             :  *
     969             :  * Callback data for pfnProgress.
     970             :  *
     971             :  * This field may be NULL if not required for the pfnProgress being used.
     972             :  */
     973             : 
     974             : /************************************************************************/
     975             : /*                           GDALWarpKernel()                           */
     976             : /************************************************************************/
     977             : 
     978        2963 : GDALWarpKernel::GDALWarpKernel()
     979             :     : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
     980             :       eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
     981             :       dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
     982             :       papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
     983             :       pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
     984             :       papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
     985             :       dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
     986             :       nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
     987             :       nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
     988             :       pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
     989             :       pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
     990             :       padfDstNoDataReal(nullptr), psThreadData(nullptr),
     991        2963 :       eTieStrategy(GWKTS_First)
     992             : {
     993        2963 : }
     994             : 
     995             : /************************************************************************/
     996             : /*                          ~GDALWarpKernel()                           */
     997             : /************************************************************************/
     998             : 
     999        2963 : GDALWarpKernel::~GDALWarpKernel()
    1000             : {
    1001        2963 : }
    1002             : 
    1003             : /************************************************************************/
    1004             : /*                              getArea()                               */
    1005             : /************************************************************************/
    1006             : 
    1007             : typedef std::pair<double, double> XYPair;
    1008             : 
    1009             : typedef std::vector<XYPair> XYPoly;
    1010             : 
    1011             : // poly may or may not be closed.
    1012      565062 : static double getArea(const XYPoly &poly)
    1013             : {
    1014             :     // CPLAssert(poly.size() >= 2);
    1015      565062 :     const size_t nPointCount = poly.size();
    1016             :     double dfAreaSum =
    1017      565062 :         poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
    1018             : 
    1019     1784760 :     for (size_t i = 1; i < nPointCount - 1; i++)
    1020             :     {
    1021     1219700 :         dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
    1022             :     }
    1023             : 
    1024      565062 :     dfAreaSum += poly[nPointCount - 1].first *
    1025      565062 :                  (poly[0].second - poly[nPointCount - 2].second);
    1026             : 
    1027      565062 :     return 0.5 * std::fabs(dfAreaSum);
    1028             : }
    1029             : 
    1030             : /************************************************************************/
    1031             : /*                       CanUse4SamplesFormula()                        */
    1032             : /************************************************************************/
    1033             : 
    1034        4240 : static bool CanUse4SamplesFormula(const GDALWarpKernel *poWK)
    1035             : {
    1036        4240 :     if (poWK->eResample == GRA_Bilinear || poWK->eResample == GRA_Cubic)
    1037             :     {
    1038             :         // Use 4-sample formula if we are not downsampling by more than a
    1039             :         // factor of 1:2
    1040        2283 :         if (poWK->dfXScale > 0.5 && poWK->dfYScale > 0.5)
    1041        1843 :             return true;
    1042         440 :         CPLDebugOnce("WARP",
    1043             :                      "Not using 4-sample bilinear/bicubic formula because "
    1044             :                      "XSCALE(=%f) and/or YSCALE(=%f) <= 0.5",
    1045             :                      poWK->dfXScale, poWK->dfYScale);
    1046             :     }
    1047        2397 :     return false;
    1048             : }
    1049             : 
    1050             : /************************************************************************/
    1051             : /*                            PerformWarp()                             */
    1052             : /************************************************************************/
    1053             : 
    1054             : /**
    1055             :  * \fn CPLErr GDALWarpKernel::PerformWarp();
    1056             :  *
    1057             :  * This method performs the warp described in the GDALWarpKernel.
    1058             :  *
    1059             :  * @return CE_None on success or CE_Failure if an error occurs.
    1060             :  */
    1061             : 
    1062        2959 : CPLErr GDALWarpKernel::PerformWarp()
    1063             : 
    1064             : {
    1065        2959 :     const CPLErr eErr = Validate();
    1066             : 
    1067        2959 :     if (eErr != CE_None)
    1068           1 :         return eErr;
    1069             : 
    1070             :     // See #2445 and #3079.
    1071        2958 :     if (nSrcXSize <= 0 || nSrcYSize <= 0)
    1072             :     {
    1073          25 :         if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
    1074             :         {
    1075           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    1076           0 :             return CE_Failure;
    1077             :         }
    1078          25 :         return CE_None;
    1079             :     }
    1080             : 
    1081             :     /* -------------------------------------------------------------------- */
    1082             :     /*      Pre-calculate resampling scales and window sizes for filtering. */
    1083             :     /* -------------------------------------------------------------------- */
    1084             : 
    1085        2933 :     dfXScale = 0.0;
    1086        2933 :     dfYScale = 0.0;
    1087             : 
    1088             :     // XSCALE and YSCALE per warping chunk is not necessarily ideal, in case of
    1089             :     // heterogeneous change in shapes.
    1090             :     // Best would probably be a per-pixel scale computation.
    1091        2933 :     const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
    1092        2933 :     const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
    1093        2933 :     if (!pszXScale || !pszYScale)
    1094             :     {
    1095             :         // Sample points along a grid in the destination space
    1096        2932 :         constexpr int MAX_POINTS_PER_DIM = 10;
    1097        2932 :         const int nPointsX = std::min(MAX_POINTS_PER_DIM, nDstXSize);
    1098        2932 :         const int nPointsY = std::min(MAX_POINTS_PER_DIM, nDstYSize);
    1099        2932 :         constexpr int CORNER_COUNT_PER_SQUARE = 4;
    1100        2932 :         const int nPoints = CORNER_COUNT_PER_SQUARE * nPointsX * nPointsY;
    1101        5864 :         std::vector<double> adfX;
    1102        5864 :         std::vector<double> adfY;
    1103        2932 :         adfX.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
    1104        2932 :         adfY.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
    1105        5864 :         std::vector<double> adfZ(CORNER_COUNT_PER_SQUARE * nPoints);
    1106        5864 :         std::vector<int> abSuccess(CORNER_COUNT_PER_SQUARE * nPoints);
    1107       27878 :         for (int iY = 0; iY < nPointsY; iY++)
    1108             :         {
    1109       24946 :             const double dfYShift = (iY > 0 && iY == nPointsY - 1) ? -1.0 : 0.0;
    1110       24946 :             const double dfY =
    1111       24946 :                 dfYShift + (nPointsY == 1 ? 0.0
    1112       24730 :                                           : static_cast<double>(iY) *
    1113       24730 :                                                 nDstYSize / (nPointsY - 1));
    1114             : 
    1115      264122 :             for (int iX = 0; iX < nPointsX; iX++)
    1116             :             {
    1117      239176 :                 const double dfXShift =
    1118      239176 :                     (iX > 0 && iX == nPointsX - 1) ? -1.0 : 0.0;
    1119             : 
    1120      239176 :                 const double dfX =
    1121      239176 :                     dfXShift + (nPointsX == 1 ? 0.0
    1122      238974 :                                               : static_cast<double>(iX) *
    1123      238974 :                                                     nDstXSize / (nPointsX - 1));
    1124             : 
    1125             :                 // Reproject a unit square at each sample point
    1126      239176 :                 adfX.push_back(dfX);
    1127      239176 :                 adfY.push_back(dfY);
    1128             : 
    1129      239176 :                 adfX.push_back(dfX + 1);
    1130      239176 :                 adfY.push_back(dfY);
    1131             : 
    1132      239176 :                 adfX.push_back(dfX);
    1133      239176 :                 adfY.push_back(dfY + 1);
    1134             : 
    1135      239176 :                 adfX.push_back(dfX + 1);
    1136      239176 :                 adfY.push_back(dfY + 1);
    1137             :             }
    1138             :         }
    1139        2932 :         pfnTransformer(pTransformerArg, TRUE, static_cast<int>(adfX.size()),
    1140             :                        adfX.data(), adfY.data(), adfZ.data(), abSuccess.data());
    1141             : 
    1142        5864 :         std::vector<XYPair> adfXYScales;
    1143        2932 :         adfXYScales.reserve(nPoints);
    1144      242108 :         for (int i = 0; i < nPoints; i += CORNER_COUNT_PER_SQUARE)
    1145             :         {
    1146      477304 :             if (abSuccess[i + 0] && abSuccess[i + 1] && abSuccess[i + 2] &&
    1147      238128 :                 abSuccess[i + 3])
    1148             :             {
    1149     1905020 :                 const auto square = [](double x) { return x * x; };
    1150             : 
    1151      238127 :                 const double vx01 = adfX[i + 1] - adfX[i + 0];
    1152      238127 :                 const double vy01 = adfY[i + 1] - adfY[i + 0];
    1153      238127 :                 const double len01_sq = square(vx01) + square(vy01);
    1154             : 
    1155      238127 :                 const double vx23 = adfX[i + 3] - adfX[i + 2];
    1156      238127 :                 const double vy23 = adfY[i + 3] - adfY[i + 2];
    1157      238127 :                 const double len23_sq = square(vx23) + square(vy23);
    1158             : 
    1159      238127 :                 const double vx02 = adfX[i + 2] - adfX[i + 0];
    1160      238127 :                 const double vy02 = adfY[i + 2] - adfY[i + 0];
    1161      238127 :                 const double len02_sq = square(vx02) + square(vy02);
    1162             : 
    1163      238127 :                 const double vx13 = adfX[i + 3] - adfX[i + 1];
    1164      238127 :                 const double vy13 = adfY[i + 3] - adfY[i + 1];
    1165      238127 :                 const double len13_sq = square(vx13) + square(vy13);
    1166             : 
    1167             :                 // ~ 20 degree, heuristic
    1168      238127 :                 constexpr double TAN_MODEST_ANGLE = 0.35;
    1169             : 
    1170             :                 // 10%, heuristic
    1171      238127 :                 constexpr double LENGTH_RELATIVE_TOLERANCE = 0.1;
    1172             : 
    1173             :                 // Security margin to avoid division by zero (would only
    1174             :                 // happen in case of degenerated coordinate transformation,
    1175             :                 // or insane upsampling)
    1176      238127 :                 constexpr double EPSILON = 1e-10;
    1177             : 
    1178             :                 // Does the transformed square looks like an almost non-rotated
    1179             :                 // quasi-rectangle ?
    1180      238127 :                 if (std::fabs(vy01) < TAN_MODEST_ANGLE * vx01 &&
    1181      231741 :                     std::fabs(vy23) < TAN_MODEST_ANGLE * vx23 &&
    1182      231714 :                     std::fabs(len01_sq - len23_sq) <
    1183      231714 :                         LENGTH_RELATIVE_TOLERANCE * std::fabs(len01_sq) &&
    1184      231601 :                     std::fabs(len02_sq - len13_sq) <
    1185      231601 :                         LENGTH_RELATIVE_TOLERANCE * std::fabs(len02_sq))
    1186             :                 {
    1187             :                     // Using a geometric average here of lenAB_sq and lenCD_sq,
    1188             :                     // hence a sqrt(), and as this is still a squared value,
    1189             :                     // we need another sqrt() to get a distance.
    1190             :                     const double dfXLength =
    1191      231586 :                         std::sqrt(std::sqrt(len01_sq * len23_sq));
    1192             :                     const double dfYLength =
    1193      231586 :                         std::sqrt(std::sqrt(len02_sq * len13_sq));
    1194      231586 :                     if (dfXLength > EPSILON && dfYLength > EPSILON)
    1195             :                     {
    1196      231586 :                         const double dfThisXScale = 1.0 / dfXLength;
    1197      231586 :                         const double dfThisYScale = 1.0 / dfYLength;
    1198      231586 :                         adfXYScales.push_back({dfThisXScale, dfThisYScale});
    1199      231586 :                     }
    1200             :                 }
    1201             :                 else
    1202             :                 {
    1203             :                     // If not, then consider the area of the transformed unit
    1204             :                     // square to determine the X/Y scales.
    1205        6541 :                     const XYPoly poly{{adfX[i + 0], adfY[i + 0]},
    1206        6541 :                                       {adfX[i + 1], adfY[i + 1]},
    1207        6541 :                                       {adfX[i + 3], adfY[i + 3]},
    1208       26164 :                                       {adfX[i + 2], adfY[i + 2]}};
    1209        6541 :                     const double dfSrcArea = getArea(poly);
    1210        6541 :                     const double dfFactor = std::sqrt(dfSrcArea);
    1211        6541 :                     if (dfFactor > EPSILON)
    1212             :                     {
    1213        6541 :                         const double dfThisXScale = 1.0 / dfFactor;
    1214        6541 :                         const double dfThisYScale = dfThisXScale;
    1215        6541 :                         adfXYScales.push_back({dfThisXScale, dfThisYScale});
    1216             :                     }
    1217             :                 }
    1218             :             }
    1219             :         }
    1220             : 
    1221        2932 :         if (!adfXYScales.empty())
    1222             :         {
    1223             :             // Sort by increasing xscale * yscale
    1224        2932 :             std::sort(adfXYScales.begin(), adfXYScales.end(),
    1225     1281170 :                       [](const XYPair &a, const XYPair &b)
    1226     1281170 :                       { return a.first * a.second < b.first * b.second; });
    1227             : 
    1228             :             // Compute the per-axis maximum of scale
    1229        2932 :             double dfXMax = 0;
    1230        2932 :             double dfYMax = 0;
    1231      241059 :             for (const auto &[dfX, dfY] : adfXYScales)
    1232             :             {
    1233      238127 :                 dfXMax = std::max(dfXMax, dfX);
    1234      238127 :                 dfYMax = std::max(dfYMax, dfY);
    1235             :             }
    1236             : 
    1237             :             // Now eliminate outliers, defined as ones whose value is < 10% of
    1238             :             // the maximum value, typically found at a polar discontinuity, and
    1239             :             // compute the average of non-outlier values.
    1240        2932 :             dfXScale = 0;
    1241        2932 :             dfYScale = 0;
    1242        2932 :             int i = 0;
    1243        2932 :             constexpr double THRESHOLD = 0.1;  // 10%, rather arbitrary
    1244      241059 :             for (const auto &[dfX, dfY] : adfXYScales)
    1245             :             {
    1246      238127 :                 if (dfX > THRESHOLD * dfXMax && dfY > THRESHOLD * dfYMax)
    1247             :                 {
    1248      235939 :                     ++i;
    1249      235939 :                     const double dfXDelta = dfX - dfXScale;
    1250      235939 :                     const double dfYDelta = dfY - dfYScale;
    1251      235939 :                     const double dfInvI = 1.0 / i;
    1252      235939 :                     dfXScale += dfXDelta * dfInvI;
    1253      235939 :                     dfYScale += dfYDelta * dfInvI;
    1254             :                 }
    1255             :             }
    1256             :         }
    1257             :     }
    1258             : 
    1259             :     // Round to closest integer reciprocal scale if we are very close to it
    1260             :     const auto RoundToClosestIntegerReciprocalScaleIfCloseEnough =
    1261        5866 :         [](double dfScale)
    1262             :     {
    1263        5866 :         if (dfScale < 1.0)
    1264             :         {
    1265        2513 :             double dfReciprocalScale = 1.0 / dfScale;
    1266        2513 :             const int nReciprocalScale =
    1267        2513 :                 static_cast<int>(dfReciprocalScale + 0.5);
    1268        2513 :             if (fabs(dfReciprocalScale - nReciprocalScale) < 0.05)
    1269        2144 :                 dfScale = 1.0 / nReciprocalScale;
    1270             :         }
    1271        5866 :         return dfScale;
    1272             :     };
    1273             : 
    1274        2933 :     if (dfXScale <= 0)
    1275           1 :         dfXScale = 1.0;
    1276        2933 :     if (dfYScale <= 0)
    1277           1 :         dfYScale = 1.0;
    1278             : 
    1279        2933 :     dfXScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfXScale);
    1280        2933 :     dfYScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfYScale);
    1281             : 
    1282        2933 :     if (pszXScale != nullptr)
    1283           1 :         dfXScale = CPLAtof(pszXScale);
    1284        2933 :     if (pszYScale != nullptr)
    1285           1 :         dfYScale = CPLAtof(pszYScale);
    1286             : 
    1287        2933 :     if (!pszXScale || !pszYScale)
    1288        2932 :         CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
    1289             : 
    1290        2933 :     const int bUse4SamplesFormula = CanUse4SamplesFormula(this);
    1291             : 
    1292             :     // Safety check for callers that would use GDALWarpKernel without using
    1293             :     // GDALWarpOperation.
    1294        2870 :     if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
    1295        2807 :          ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
    1296        5866 :           !bUse4SamplesFormula)) &&
    1297         346 :         atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
    1298             :             WARP_EXTRA_ELTS)
    1299             :     {
    1300           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1301             :                  "Source arrays must have WARP_EXTRA_ELTS extra elements at "
    1302             :                  "their end. "
    1303             :                  "See GDALWarpKernel class definition. If this condition is "
    1304             :                  "fulfilled, define a EXTRA_ELTS=%d warp options",
    1305             :                  WARP_EXTRA_ELTS);
    1306           0 :         return CE_Failure;
    1307             :     }
    1308             : 
    1309        2933 :     dfXFilter = anGWKFilterRadius[eResample];
    1310        2933 :     dfYFilter = anGWKFilterRadius[eResample];
    1311             : 
    1312        2933 :     nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
    1313        2380 :                               : static_cast<int>(dfXFilter);
    1314        2933 :     nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
    1315        2384 :                               : static_cast<int>(dfYFilter);
    1316             : 
    1317             :     // Filter window offset depends on the parity of the kernel radius.
    1318        2933 :     nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
    1319        2933 :     nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
    1320             : 
    1321        2933 :     bApplyVerticalShift =
    1322        2933 :         CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
    1323        2933 :     dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
    1324        2933 :         papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
    1325             : 
    1326             :     /* -------------------------------------------------------------------- */
    1327             :     /*      Set up resampling functions.                                    */
    1328             :     /* -------------------------------------------------------------------- */
    1329        2933 :     if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
    1330          12 :         return GWKGeneralCase(this);
    1331             : 
    1332        2921 :     const bool bNoMasksOrDstDensityOnly =
    1333        2914 :         papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
    1334        5835 :         pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
    1335             : 
    1336        2921 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour &&
    1337             :         bNoMasksOrDstDensityOnly)
    1338         944 :         return GWKNearestNoMasksOrDstDensityOnlyByte(this);
    1339             : 
    1340        1977 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Bilinear &&
    1341             :         bNoMasksOrDstDensityOnly)
    1342         126 :         return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
    1343             : 
    1344        1851 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Cubic &&
    1345             :         bNoMasksOrDstDensityOnly)
    1346         677 :         return GWKCubicNoMasksOrDstDensityOnlyByte(this);
    1347             : 
    1348        1174 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_CubicSpline &&
    1349             :         bNoMasksOrDstDensityOnly)
    1350          12 :         return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
    1351             : 
    1352        1162 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour)
    1353         350 :         return GWKNearestByte(this);
    1354             : 
    1355         812 :     if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
    1356         165 :         eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
    1357          14 :         return GWKNearestNoMasksOrDstDensityOnlyShort(this);
    1358             : 
    1359         798 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
    1360             :         bNoMasksOrDstDensityOnly)
    1361           5 :         return GWKCubicNoMasksOrDstDensityOnlyShort(this);
    1362             : 
    1363         793 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
    1364             :         bNoMasksOrDstDensityOnly)
    1365           6 :         return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
    1366             : 
    1367         787 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
    1368             :         bNoMasksOrDstDensityOnly)
    1369           5 :         return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
    1370             : 
    1371         782 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
    1372             :         bNoMasksOrDstDensityOnly)
    1373          14 :         return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
    1374             : 
    1375         768 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
    1376             :         bNoMasksOrDstDensityOnly)
    1377           5 :         return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
    1378             : 
    1379         763 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
    1380             :         bNoMasksOrDstDensityOnly)
    1381           6 :         return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
    1382             : 
    1383         757 :     if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
    1384          45 :         return GWKNearestShort(this);
    1385             : 
    1386         712 :     if (eWorkingDataType == GDT_UInt16 && eResample == GRA_NearestNeighbour)
    1387          10 :         return GWKNearestUnsignedShort(this);
    1388             : 
    1389         702 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
    1390             :         bNoMasksOrDstDensityOnly)
    1391          11 :         return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
    1392             : 
    1393         691 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
    1394          50 :         return GWKNearestFloat(this);
    1395             : 
    1396         641 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
    1397             :         bNoMasksOrDstDensityOnly)
    1398           4 :         return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
    1399             : 
    1400         637 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
    1401             :         bNoMasksOrDstDensityOnly)
    1402           9 :         return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
    1403             : 
    1404             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    1405             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
    1406             :         bNoMasksOrDstDensityOnly)
    1407             :         return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
    1408             : 
    1409             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
    1410             :         bNoMasksOrDstDensityOnly)
    1411             :         return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
    1412             : #endif
    1413             : 
    1414         628 :     if (eResample == GRA_Average)
    1415          77 :         return GWKAverageOrMode(this);
    1416             : 
    1417         551 :     if (eResample == GRA_RMS)
    1418           9 :         return GWKAverageOrMode(this);
    1419             : 
    1420         542 :     if (eResample == GRA_Mode)
    1421          45 :         return GWKAverageOrMode(this);
    1422             : 
    1423         497 :     if (eResample == GRA_Max)
    1424           6 :         return GWKAverageOrMode(this);
    1425             : 
    1426         491 :     if (eResample == GRA_Min)
    1427           5 :         return GWKAverageOrMode(this);
    1428             : 
    1429         486 :     if (eResample == GRA_Med)
    1430           6 :         return GWKAverageOrMode(this);
    1431             : 
    1432         480 :     if (eResample == GRA_Q1)
    1433          10 :         return GWKAverageOrMode(this);
    1434             : 
    1435         470 :     if (eResample == GRA_Q3)
    1436           5 :         return GWKAverageOrMode(this);
    1437             : 
    1438         465 :     if (eResample == GRA_Sum)
    1439          19 :         return GWKSumPreserving(this);
    1440             : 
    1441         446 :     if (!GDALDataTypeIsComplex(eWorkingDataType))
    1442             :     {
    1443         219 :         return GWKRealCase(this);
    1444             :     }
    1445             : 
    1446         227 :     return GWKGeneralCase(this);
    1447             : }
    1448             : 
    1449             : /************************************************************************/
    1450             : /*                              Validate()                              */
    1451             : /************************************************************************/
    1452             : 
    1453             : /**
    1454             :  * \fn CPLErr GDALWarpKernel::Validate()
    1455             :  *
    1456             :  * Check the settings in the GDALWarpKernel, and issue a CPLError()
    1457             :  * (and return CE_Failure) if the configuration is considered to be
    1458             :  * invalid for some reason.
    1459             :  *
    1460             :  * This method will also do some standard defaulting such as setting
    1461             :  * pfnProgress to GDALDummyProgress() if it is NULL.
    1462             :  *
    1463             :  * @return CE_None on success or CE_Failure if an error is detected.
    1464             :  */
    1465             : 
    1466        2959 : CPLErr GDALWarpKernel::Validate()
    1467             : 
    1468             : {
    1469        2959 :     if (static_cast<size_t>(eResample) >=
    1470             :         (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
    1471             :     {
    1472           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1473             :                  "Unsupported resampling method %d.",
    1474           0 :                  static_cast<int>(eResample));
    1475           0 :         return CE_Failure;
    1476             :     }
    1477             : 
    1478             :     // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
    1479             :     // be ignored as contributing source pixels during resampling. Only taken into account by
    1480             :     // Average currently
    1481             :     const char *pszExcludedValues =
    1482        2959 :         CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
    1483        2959 :     if (pszExcludedValues)
    1484             :     {
    1485             :         const CPLStringList aosTokens(
    1486          14 :             CSLTokenizeString2(pszExcludedValues, "(,)", 0));
    1487          14 :         if ((aosTokens.size() % nBands) != 0)
    1488             :         {
    1489           1 :             CPLError(CE_Failure, CPLE_AppDefined,
    1490             :                      "EXCLUDED_VALUES should contain one or several tuples of "
    1491             :                      "%d values formatted like <R>,<G>,<B> or "
    1492             :                      "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
    1493             :                      "tuples",
    1494             :                      nBands);
    1495           1 :             return CE_Failure;
    1496             :         }
    1497          26 :         std::vector<double> adfTuple;
    1498          52 :         for (int i = 0; i < aosTokens.size(); ++i)
    1499             :         {
    1500          39 :             adfTuple.push_back(CPLAtof(aosTokens[i]));
    1501          39 :             if (((i + 1) % nBands) == 0)
    1502             :             {
    1503          13 :                 m_aadfExcludedValues.push_back(adfTuple);
    1504          13 :                 adfTuple.clear();
    1505             :             }
    1506             :         }
    1507             :     }
    1508             : 
    1509        2958 :     return CE_None;
    1510             : }
    1511             : 
    1512             : /************************************************************************/
    1513             : /*                         GWKOverlayDensity()                          */
    1514             : /*                                                                      */
    1515             : /*      Compute the final density for the destination pixel.  This      */
    1516             : /*      is a function of the overlay density (passed in) and the        */
    1517             : /*      original density.                                               */
    1518             : /************************************************************************/
    1519             : 
    1520    10022900 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
    1521             :                               double dfDensity)
    1522             : {
    1523    10022900 :     if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
    1524     8062630 :         return;
    1525             : 
    1526     1960260 :     poWK->pafDstDensity[iDstOffset] =
    1527     1960260 :         1.0f -
    1528     1960260 :         (1.0f - float(dfDensity)) * (1.0f - poWK->pafDstDensity[iDstOffset]);
    1529             : }
    1530             : 
    1531             : /************************************************************************/
    1532             : /*                           GWKRoundValueT()                           */
    1533             : /************************************************************************/
    1534             : 
    1535             : template <class T, class U, bool is_signed> struct sGWKRoundValueT
    1536             : {
    1537             :     static T eval(U);
    1538             : };
    1539             : 
    1540             : template <class T, class U> struct sGWKRoundValueT<T, U, true> /* signed */
    1541             : {
    1542      791525 :     static T eval(U value)
    1543             :     {
    1544      791525 :         return static_cast<T>(floor(value + U(0.5)));
    1545             :     }
    1546             : };
    1547             : 
    1548             : template <class T, class U> struct sGWKRoundValueT<T, U, false> /* unsigned */
    1549             : {
    1550   124559887 :     static T eval(U value)
    1551             :     {
    1552   124559887 :         return static_cast<T>(value + U(0.5));
    1553             :     }
    1554             : };
    1555             : 
    1556   125351412 : template <class T, class U> static T GWKRoundValueT(U value)
    1557             : {
    1558   125351412 :     return sGWKRoundValueT<T, U, cpl::NumericLimits<T>::is_signed>::eval(value);
    1559             : }
    1560             : 
    1561      268974 : template <> float GWKRoundValueT<float, double>(double value)
    1562             : {
    1563      268974 :     return static_cast<float>(value);
    1564             : }
    1565             : 
    1566             : #ifdef notused
    1567             : template <> double GWKRoundValueT<double, double>(double value)
    1568             : {
    1569             :     return value;
    1570             : }
    1571             : #endif
    1572             : 
    1573             : /************************************************************************/
    1574             : /*                           GWKClampValueT()                           */
    1575             : /************************************************************************/
    1576             : 
    1577   119660582 : template <class T, class U> static CPL_INLINE T GWKClampValueT(U value)
    1578             : {
    1579   119660582 :     if (value < static_cast<U>(cpl::NumericLimits<T>::min()))
    1580      545257 :         return cpl::NumericLimits<T>::min();
    1581   119114984 :     else if (value > static_cast<U>(cpl::NumericLimits<T>::max()))
    1582      772808 :         return cpl::NumericLimits<T>::max();
    1583             :     else
    1584   118342044 :         return GWKRoundValueT<T, U>(value);
    1585             : }
    1586             : 
    1587      718914 : template <> float GWKClampValueT<float, double>(double dfValue)
    1588             : {
    1589      718914 :     return static_cast<float>(dfValue);
    1590             : }
    1591             : 
    1592             : #ifdef notused
    1593             : template <> double GWKClampValueT<double, double>(double dfValue)
    1594             : {
    1595             :     return dfValue;
    1596             : }
    1597             : #endif
    1598             : 
    1599             : /************************************************************************/
    1600             : /*                            AvoidNoData()                             */
    1601             : /************************************************************************/
    1602             : 
    1603        1283 : template <class T> inline void AvoidNoData(T *pDst, GPtrDiff_t iDstOffset)
    1604             : {
    1605             :     if constexpr (cpl::NumericLimits<T>::is_integer)
    1606             :     {
    1607        1027 :         if (pDst[iDstOffset] == static_cast<T>(cpl::NumericLimits<T>::lowest()))
    1608             :         {
    1609         515 :             pDst[iDstOffset] =
    1610         515 :                 static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
    1611             :         }
    1612             :         else
    1613         512 :             pDst[iDstOffset]--;
    1614             :     }
    1615             :     else
    1616             :     {
    1617         256 :         if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
    1618             :         {
    1619             :             using std::nextafter;
    1620           0 :             pDst[iDstOffset] = nextafter(pDst[iDstOffset], static_cast<T>(0));
    1621             :         }
    1622             :         else
    1623             :         {
    1624             :             using std::nextafter;
    1625         256 :             pDst[iDstOffset] =
    1626         256 :                 nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
    1627             :         }
    1628             :     }
    1629        1283 : }
    1630             : 
    1631             : /************************************************************************/
    1632             : /*                            AvoidNoData()                             */
    1633             : /************************************************************************/
    1634             : 
    1635             : template <class T>
    1636    13527030 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1637             :                         GPtrDiff_t iDstOffset)
    1638             : {
    1639    13527030 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1640    13527030 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1641             : 
    1642    13527030 :     if (poWK->padfDstNoDataReal != nullptr &&
    1643     6419188 :         poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
    1644             :     {
    1645         640 :         AvoidNoData(pDst, iDstOffset);
    1646             : 
    1647         640 :         if (!poWK->bWarnedAboutDstNoDataReplacement)
    1648             :         {
    1649          40 :             const_cast<GDALWarpKernel *>(poWK)
    1650             :                 ->bWarnedAboutDstNoDataReplacement = true;
    1651          40 :             CPLError(CE_Warning, CPLE_AppDefined,
    1652             :                      "Value %g in the source dataset has been changed to %g "
    1653             :                      "in the destination dataset to avoid being treated as "
    1654             :                      "NoData. To avoid this, select a different NoData value "
    1655             :                      "for the destination dataset.",
    1656          40 :                      poWK->padfDstNoDataReal[iBand],
    1657          40 :                      static_cast<double>(pDst[iDstOffset]));
    1658             :         }
    1659             :     }
    1660    13527030 : }
    1661             : 
    1662             : /************************************************************************/
    1663             : /*                      GWKAvoidNoDataMultiBand()                       */
    1664             : /************************************************************************/
    1665             : 
    1666             : template <class T>
    1667      524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
    1668             :                                     GPtrDiff_t iDstOffset)
    1669             : {
    1670      524573 :     T **ppDst = reinterpret_cast<T **>(poWK->papabyDstImage);
    1671      524573 :     if (poWK->padfDstNoDataReal != nullptr)
    1672             :     {
    1673      208615 :         for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1674             :         {
    1675      208294 :             if (poWK->padfDstNoDataReal[iBand] !=
    1676      208294 :                 static_cast<double>(ppDst[iBand][iDstOffset]))
    1677      205830 :                 return;
    1678             :         }
    1679         964 :         for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1680             :         {
    1681         643 :             AvoidNoData(ppDst[iBand], iDstOffset);
    1682             :         }
    1683             : 
    1684         321 :         if (!poWK->bWarnedAboutDstNoDataReplacement)
    1685             :         {
    1686          21 :             const_cast<GDALWarpKernel *>(poWK)
    1687             :                 ->bWarnedAboutDstNoDataReplacement = true;
    1688          42 :             std::string valueSrc, valueDst;
    1689          64 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1690             :             {
    1691          43 :                 if (!valueSrc.empty())
    1692             :                 {
    1693          22 :                     valueSrc += ',';
    1694          22 :                     valueDst += ',';
    1695             :                 }
    1696          43 :                 valueSrc += CPLSPrintf("%g", poWK->padfDstNoDataReal[iBand]);
    1697          43 :                 valueDst += CPLSPrintf(
    1698          43 :                     "%g", static_cast<double>(ppDst[iBand][iDstOffset]));
    1699             :             }
    1700          21 :             CPLError(CE_Warning, CPLE_AppDefined,
    1701             :                      "Value %s in the source dataset has been changed to %s "
    1702             :                      "in the destination dataset to avoid being treated as "
    1703             :                      "NoData. To avoid this, select a different NoData value "
    1704             :                      "for the destination dataset.",
    1705             :                      valueSrc.c_str(), valueDst.c_str());
    1706             :         }
    1707             :     }
    1708             : }
    1709             : 
    1710             : /************************************************************************/
    1711             : /*                      GWKAvoidNoDataMultiBand()                       */
    1712             : /************************************************************************/
    1713             : 
    1714      524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
    1715             :                                     GPtrDiff_t iDstOffset)
    1716             : {
    1717      524573 :     switch (poWK->eWorkingDataType)
    1718             :     {
    1719      523997 :         case GDT_UInt8:
    1720      523997 :             GWKAvoidNoDataMultiBand<std::uint8_t>(poWK, iDstOffset);
    1721      523997 :             break;
    1722             : 
    1723           0 :         case GDT_Int8:
    1724           0 :             GWKAvoidNoDataMultiBand<std::int8_t>(poWK, iDstOffset);
    1725           0 :             break;
    1726             : 
    1727         128 :         case GDT_Int16:
    1728         128 :             GWKAvoidNoDataMultiBand<std::int16_t>(poWK, iDstOffset);
    1729         128 :             break;
    1730             : 
    1731          64 :         case GDT_UInt16:
    1732          64 :             GWKAvoidNoDataMultiBand<std::uint16_t>(poWK, iDstOffset);
    1733          64 :             break;
    1734             : 
    1735          64 :         case GDT_Int32:
    1736          64 :             GWKAvoidNoDataMultiBand<std::int32_t>(poWK, iDstOffset);
    1737          64 :             break;
    1738             : 
    1739          64 :         case GDT_UInt32:
    1740          64 :             GWKAvoidNoDataMultiBand<std::uint32_t>(poWK, iDstOffset);
    1741          64 :             break;
    1742             : 
    1743          64 :         case GDT_Int64:
    1744          64 :             GWKAvoidNoDataMultiBand<std::int64_t>(poWK, iDstOffset);
    1745          64 :             break;
    1746             : 
    1747          64 :         case GDT_UInt64:
    1748          64 :             GWKAvoidNoDataMultiBand<std::uint64_t>(poWK, iDstOffset);
    1749          64 :             break;
    1750             : 
    1751           0 :         case GDT_Float16:
    1752           0 :             GWKAvoidNoDataMultiBand<GFloat16>(poWK, iDstOffset);
    1753           0 :             break;
    1754             : 
    1755          64 :         case GDT_Float32:
    1756          64 :             GWKAvoidNoDataMultiBand<float>(poWK, iDstOffset);
    1757          64 :             break;
    1758             : 
    1759          64 :         case GDT_Float64:
    1760          64 :             GWKAvoidNoDataMultiBand<double>(poWK, iDstOffset);
    1761          64 :             break;
    1762             : 
    1763           0 :         case GDT_CInt16:
    1764             :         case GDT_CInt32:
    1765             :         case GDT_CFloat16:
    1766             :         case GDT_CFloat32:
    1767             :         case GDT_CFloat64:
    1768             :         case GDT_Unknown:
    1769             :         case GDT_TypeCount:
    1770           0 :             break;
    1771             :     }
    1772      524573 : }
    1773             : 
    1774             : /************************************************************************/
    1775             : /*                       GWKSetPixelValueRealT()                        */
    1776             : /************************************************************************/
    1777             : 
    1778             : template <class T>
    1779     9992427 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
    1780             :                                   GPtrDiff_t iDstOffset, double dfDensity,
    1781             :                                   T value, bool bAvoidNoDataSingleBand)
    1782             : {
    1783     9992427 :     T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    1784             : 
    1785             :     /* -------------------------------------------------------------------- */
    1786             :     /*      If the source density is less than 100% we need to fetch the    */
    1787             :     /*      existing destination value, and mix it with the source to       */
    1788             :     /*      get the new "to apply" value.  Also compute composite           */
    1789             :     /*      density.                                                        */
    1790             :     /*                                                                      */
    1791             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1792             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1793             :     /* -------------------------------------------------------------------- */
    1794     9992427 :     if (dfDensity < 0.9999)
    1795             :     {
    1796      945508 :         if (dfDensity < 0.0001)
    1797           0 :             return true;
    1798             : 
    1799      945508 :         double dfDstDensity = 1.0;
    1800             : 
    1801      945508 :         if (poWK->pafDstDensity != nullptr)
    1802      944036 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    1803        1472 :         else if (poWK->panDstValid != nullptr &&
    1804           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1805           0 :             dfDstDensity = 0.0;
    1806             : 
    1807             :         // It seems like we also ought to be testing panDstValid[] here!
    1808             : 
    1809      945508 :         const double dfDstReal = static_cast<double>(pDst[iDstOffset]);
    1810             : 
    1811             :         // The destination density is really only relative to the portion
    1812             :         // not occluded by the overlay.
    1813      945508 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1814             : 
    1815      945508 :         const double dfReal =
    1816      945508 :             (double(value) * dfDensity + dfDstReal * dfDstInfluence) /
    1817      945508 :             (dfDensity + dfDstInfluence);
    1818             : 
    1819             :         /* --------------------------------------------------------------------
    1820             :          */
    1821             :         /*      Actually apply the destination value. */
    1822             :         /*                                                                      */
    1823             :         /*      Avoid using the destination nodata value for integer datatypes
    1824             :          */
    1825             :         /*      if by chance it is equal to the computed pixel value. */
    1826             :         /* --------------------------------------------------------------------
    1827             :          */
    1828      945508 :         pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
    1829             :     }
    1830             :     else
    1831             :     {
    1832     9046916 :         pDst[iDstOffset] = value;
    1833             :     }
    1834             : 
    1835     9992427 :     if (bAvoidNoDataSingleBand)
    1836     8719761 :         AvoidNoData<T>(poWK, iBand, iDstOffset);
    1837             : 
    1838     9992427 :     return true;
    1839             : }
    1840             : 
    1841             : /************************************************************************/
    1842             : /*                      ClampRoundAndAvoidNoData()                      */
    1843             : /************************************************************************/
    1844             : 
    1845             : template <class T>
    1846     5107725 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1847             :                                      GPtrDiff_t iDstOffset, double dfReal,
    1848             :                                      bool bAvoidNoDataSingleBand)
    1849             : {
    1850     5107725 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1851     5107725 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1852             : 
    1853             :     if constexpr (cpl::NumericLimits<T>::is_integer)
    1854             :     {
    1855             :         using std::floor;
    1856     4610595 :         if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
    1857        5298 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
    1858     4605295 :         else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1859       23620 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
    1860             :         else if constexpr (cpl::NumericLimits<T>::is_signed)
    1861       10410 :             pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
    1862             :         else
    1863     4571265 :             pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
    1864             :     }
    1865             :     else
    1866             :     {
    1867      497130 :         pDst[iDstOffset] = static_cast<T>(dfReal);
    1868             :     }
    1869             : 
    1870     5107725 :     if (bAvoidNoDataSingleBand)
    1871     4807319 :         AvoidNoData<T>(poWK, iBand, iDstOffset);
    1872     5107725 : }
    1873             : 
    1874             : /************************************************************************/
    1875             : /*                          GWKSetPixelValue()                          */
    1876             : /************************************************************************/
    1877             : 
    1878     4012410 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
    1879             :                              GPtrDiff_t iDstOffset, double dfDensity,
    1880             :                              double dfReal, double dfImag,
    1881             :                              bool bAvoidNoDataSingleBand)
    1882             : 
    1883             : {
    1884     4012410 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1885             : 
    1886             :     /* -------------------------------------------------------------------- */
    1887             :     /*      If the source density is less than 100% we need to fetch the    */
    1888             :     /*      existing destination value, and mix it with the source to       */
    1889             :     /*      get the new "to apply" value.  Also compute composite           */
    1890             :     /*      density.                                                        */
    1891             :     /*                                                                      */
    1892             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1893             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1894             :     /* -------------------------------------------------------------------- */
    1895     4012410 :     if (dfDensity < 0.9999)
    1896             :     {
    1897         800 :         if (dfDensity < 0.0001)
    1898           0 :             return true;
    1899             : 
    1900         800 :         double dfDstDensity = 1.0;
    1901         800 :         if (poWK->pafDstDensity != nullptr)
    1902         800 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    1903           0 :         else if (poWK->panDstValid != nullptr &&
    1904           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1905           0 :             dfDstDensity = 0.0;
    1906             : 
    1907         800 :         double dfDstReal = 0.0;
    1908         800 :         double dfDstImag = 0.0;
    1909             :         // It seems like we also ought to be testing panDstValid[] here!
    1910             : 
    1911             :         // TODO(schwehr): Factor out this repreated type of set.
    1912         800 :         switch (poWK->eWorkingDataType)
    1913             :         {
    1914           0 :             case GDT_UInt8:
    1915           0 :                 dfDstReal = pabyDst[iDstOffset];
    1916           0 :                 dfDstImag = 0.0;
    1917           0 :                 break;
    1918             : 
    1919           0 :             case GDT_Int8:
    1920           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1921           0 :                 dfDstImag = 0.0;
    1922           0 :                 break;
    1923             : 
    1924         400 :             case GDT_Int16:
    1925         400 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1926         400 :                 dfDstImag = 0.0;
    1927         400 :                 break;
    1928             : 
    1929         400 :             case GDT_UInt16:
    1930         400 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1931         400 :                 dfDstImag = 0.0;
    1932         400 :                 break;
    1933             : 
    1934           0 :             case GDT_Int32:
    1935           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1936           0 :                 dfDstImag = 0.0;
    1937           0 :                 break;
    1938             : 
    1939           0 :             case GDT_UInt32:
    1940           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1941           0 :                 dfDstImag = 0.0;
    1942           0 :                 break;
    1943             : 
    1944           0 :             case GDT_Int64:
    1945           0 :                 dfDstReal = static_cast<double>(
    1946           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1947           0 :                 dfDstImag = 0.0;
    1948           0 :                 break;
    1949             : 
    1950           0 :             case GDT_UInt64:
    1951           0 :                 dfDstReal = static_cast<double>(
    1952           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1953           0 :                 dfDstImag = 0.0;
    1954           0 :                 break;
    1955             : 
    1956           0 :             case GDT_Float16:
    1957           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    1958           0 :                 dfDstImag = 0.0;
    1959           0 :                 break;
    1960             : 
    1961           0 :             case GDT_Float32:
    1962           0 :                 dfDstReal =
    1963           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
    1964           0 :                 dfDstImag = 0.0;
    1965           0 :                 break;
    1966             : 
    1967           0 :             case GDT_Float64:
    1968           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1969           0 :                 dfDstImag = 0.0;
    1970           0 :                 break;
    1971             : 
    1972           0 :             case GDT_CInt16:
    1973           0 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
    1974           0 :                 dfDstImag =
    1975           0 :                     reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
    1976           0 :                 break;
    1977             : 
    1978           0 :             case GDT_CInt32:
    1979           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
    1980           0 :                 dfDstImag =
    1981           0 :                     reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
    1982           0 :                 break;
    1983             : 
    1984           0 :             case GDT_CFloat16:
    1985             :                 dfDstReal =
    1986           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
    1987             :                 dfDstImag =
    1988           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
    1989           0 :                 break;
    1990             : 
    1991           0 :             case GDT_CFloat32:
    1992           0 :                 dfDstReal =
    1993           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset * 2]);
    1994           0 :                 dfDstImag = double(
    1995           0 :                     reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1]);
    1996           0 :                 break;
    1997             : 
    1998           0 :             case GDT_CFloat64:
    1999           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
    2000           0 :                 dfDstImag =
    2001           0 :                     reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
    2002           0 :                 break;
    2003             : 
    2004           0 :             case GDT_Unknown:
    2005             :             case GDT_TypeCount:
    2006           0 :                 CPLAssert(false);
    2007             :                 return false;
    2008             :         }
    2009             : 
    2010             :         // The destination density is really only relative to the portion
    2011             :         // not occluded by the overlay.
    2012         800 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    2013             : 
    2014         800 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    2015         800 :                  (dfDensity + dfDstInfluence);
    2016             : 
    2017         800 :         dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
    2018         800 :                  (dfDensity + dfDstInfluence);
    2019             :     }
    2020             : 
    2021             :     /* -------------------------------------------------------------------- */
    2022             :     /*      Actually apply the destination value.                           */
    2023             :     /*                                                                      */
    2024             :     /*      Avoid using the destination nodata value for integer datatypes  */
    2025             :     /*      if by chance it is equal to the computed pixel value.           */
    2026             :     /* -------------------------------------------------------------------- */
    2027             : 
    2028     4012410 :     switch (poWK->eWorkingDataType)
    2029             :     {
    2030     3290010 :         case GDT_UInt8:
    2031     3290010 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
    2032             :                                             bAvoidNoDataSingleBand);
    2033     3290010 :             break;
    2034             : 
    2035           0 :         case GDT_Int8:
    2036           0 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
    2037             :                                             bAvoidNoDataSingleBand);
    2038           0 :             break;
    2039             : 
    2040        7472 :         case GDT_Int16:
    2041        7472 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
    2042             :                                              bAvoidNoDataSingleBand);
    2043        7472 :             break;
    2044             : 
    2045         464 :         case GDT_UInt16:
    2046         464 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
    2047             :                                               bAvoidNoDataSingleBand);
    2048         464 :             break;
    2049             : 
    2050          63 :         case GDT_UInt32:
    2051          63 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
    2052             :                                               bAvoidNoDataSingleBand);
    2053          63 :             break;
    2054             : 
    2055          63 :         case GDT_Int32:
    2056          63 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
    2057             :                                              bAvoidNoDataSingleBand);
    2058          63 :             break;
    2059             : 
    2060           0 :         case GDT_UInt64:
    2061           0 :             ClampRoundAndAvoidNoData<std::uint64_t>(
    2062             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2063           0 :             break;
    2064             : 
    2065           0 :         case GDT_Int64:
    2066           0 :             ClampRoundAndAvoidNoData<std::int64_t>(
    2067             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2068           0 :             break;
    2069             : 
    2070           0 :         case GDT_Float16:
    2071           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
    2072             :                                                bAvoidNoDataSingleBand);
    2073           0 :             break;
    2074             : 
    2075      478957 :         case GDT_Float32:
    2076      478957 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
    2077             :                                             bAvoidNoDataSingleBand);
    2078      478957 :             break;
    2079             : 
    2080         149 :         case GDT_Float64:
    2081         149 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
    2082             :                                              bAvoidNoDataSingleBand);
    2083         149 :             break;
    2084             : 
    2085      234079 :         case GDT_CInt16:
    2086             :         {
    2087             :             typedef GInt16 T;
    2088      234079 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    2089           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2090           0 :                     cpl::NumericLimits<T>::min();
    2091      234079 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    2092           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2093           0 :                     cpl::NumericLimits<T>::max();
    2094             :             else
    2095      234079 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2096      234079 :                     static_cast<T>(floor(dfReal + 0.5));
    2097      234079 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    2098           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2099           0 :                     cpl::NumericLimits<T>::min();
    2100      234079 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    2101           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2102           0 :                     cpl::NumericLimits<T>::max();
    2103             :             else
    2104      234079 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2105      234079 :                     static_cast<T>(floor(dfImag + 0.5));
    2106      234079 :             break;
    2107             :         }
    2108             : 
    2109         379 :         case GDT_CInt32:
    2110             :         {
    2111             :             typedef GInt32 T;
    2112         379 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    2113           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2114           0 :                     cpl::NumericLimits<T>::min();
    2115         379 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    2116           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2117           0 :                     cpl::NumericLimits<T>::max();
    2118             :             else
    2119         379 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2120         379 :                     static_cast<T>(floor(dfReal + 0.5));
    2121         379 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    2122           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2123           0 :                     cpl::NumericLimits<T>::min();
    2124         379 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    2125           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2126           0 :                     cpl::NumericLimits<T>::max();
    2127             :             else
    2128         379 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2129         379 :                     static_cast<T>(floor(dfImag + 0.5));
    2130         379 :             break;
    2131             :         }
    2132             : 
    2133           0 :         case GDT_CFloat16:
    2134           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
    2135           0 :                 static_cast<GFloat16>(dfReal);
    2136           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
    2137           0 :                 static_cast<GFloat16>(dfImag);
    2138           0 :             break;
    2139             : 
    2140         394 :         case GDT_CFloat32:
    2141         394 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
    2142         394 :                 static_cast<float>(dfReal);
    2143         394 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
    2144         394 :                 static_cast<float>(dfImag);
    2145         394 :             break;
    2146             : 
    2147         380 :         case GDT_CFloat64:
    2148         380 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
    2149         380 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
    2150         380 :             break;
    2151             : 
    2152           0 :         case GDT_Unknown:
    2153             :         case GDT_TypeCount:
    2154           0 :             return false;
    2155             :     }
    2156             : 
    2157     4012410 :     return true;
    2158             : }
    2159             : 
    2160             : /************************************************************************/
    2161             : /*                        GWKSetPixelValueReal()                        */
    2162             : /************************************************************************/
    2163             : 
    2164     1330540 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2165             :                                  GPtrDiff_t iDstOffset, double dfDensity,
    2166             :                                  double dfReal, bool bAvoidNoDataSingleBand)
    2167             : 
    2168             : {
    2169     1330540 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    2170             : 
    2171             :     /* -------------------------------------------------------------------- */
    2172             :     /*      If the source density is less than 100% we need to fetch the    */
    2173             :     /*      existing destination value, and mix it with the source to       */
    2174             :     /*      get the new "to apply" value.  Also compute composite           */
    2175             :     /*      density.                                                        */
    2176             :     /*                                                                      */
    2177             :     /*      We avoid mixing if density is very near one or risk mixing      */
    2178             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    2179             :     /* -------------------------------------------------------------------- */
    2180     1330540 :     if (dfDensity < 0.9999)
    2181             :     {
    2182         600 :         if (dfDensity < 0.0001)
    2183           0 :             return true;
    2184             : 
    2185         600 :         double dfDstReal = 0.0;
    2186         600 :         double dfDstDensity = 1.0;
    2187             : 
    2188         600 :         if (poWK->pafDstDensity != nullptr)
    2189         600 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    2190           0 :         else if (poWK->panDstValid != nullptr &&
    2191           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    2192           0 :             dfDstDensity = 0.0;
    2193             : 
    2194             :         // It seems like we also ought to be testing panDstValid[] here!
    2195             : 
    2196         600 :         switch (poWK->eWorkingDataType)
    2197             :         {
    2198           0 :             case GDT_UInt8:
    2199           0 :                 dfDstReal = pabyDst[iDstOffset];
    2200           0 :                 break;
    2201             : 
    2202           0 :             case GDT_Int8:
    2203           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    2204           0 :                 break;
    2205             : 
    2206         300 :             case GDT_Int16:
    2207         300 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    2208         300 :                 break;
    2209             : 
    2210         300 :             case GDT_UInt16:
    2211         300 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    2212         300 :                 break;
    2213             : 
    2214           0 :             case GDT_Int32:
    2215           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    2216           0 :                 break;
    2217             : 
    2218           0 :             case GDT_UInt32:
    2219           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    2220           0 :                 break;
    2221             : 
    2222           0 :             case GDT_Int64:
    2223           0 :                 dfDstReal = static_cast<double>(
    2224           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    2225           0 :                 break;
    2226             : 
    2227           0 :             case GDT_UInt64:
    2228           0 :                 dfDstReal = static_cast<double>(
    2229           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    2230           0 :                 break;
    2231             : 
    2232           0 :             case GDT_Float16:
    2233           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    2234           0 :                 break;
    2235             : 
    2236           0 :             case GDT_Float32:
    2237           0 :                 dfDstReal =
    2238           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
    2239           0 :                 break;
    2240             : 
    2241           0 :             case GDT_Float64:
    2242           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    2243           0 :                 break;
    2244             : 
    2245           0 :             case GDT_CInt16:
    2246             :             case GDT_CInt32:
    2247             :             case GDT_CFloat16:
    2248             :             case GDT_CFloat32:
    2249             :             case GDT_CFloat64:
    2250             :             case GDT_Unknown:
    2251             :             case GDT_TypeCount:
    2252           0 :                 CPLAssert(false);
    2253             :                 return false;
    2254             :         }
    2255             : 
    2256             :         // The destination density is really only relative to the portion
    2257             :         // not occluded by the overlay.
    2258         600 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    2259             : 
    2260         600 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    2261         600 :                  (dfDensity + dfDstInfluence);
    2262             :     }
    2263             : 
    2264             :     /* -------------------------------------------------------------------- */
    2265             :     /*      Actually apply the destination value.                           */
    2266             :     /*                                                                      */
    2267             :     /*      Avoid using the destination nodata value for integer datatypes  */
    2268             :     /*      if by chance it is equal to the computed pixel value.           */
    2269             :     /* -------------------------------------------------------------------- */
    2270             : 
    2271     1330540 :     switch (poWK->eWorkingDataType)
    2272             :     {
    2273     1308410 :         case GDT_UInt8:
    2274     1308410 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
    2275             :                                             bAvoidNoDataSingleBand);
    2276     1308410 :             break;
    2277             : 
    2278           0 :         case GDT_Int8:
    2279           0 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
    2280             :                                             bAvoidNoDataSingleBand);
    2281           0 :             break;
    2282             : 
    2283        1309 :         case GDT_Int16:
    2284        1309 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
    2285             :                                              bAvoidNoDataSingleBand);
    2286        1309 :             break;
    2287             : 
    2288         475 :         case GDT_UInt16:
    2289         475 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
    2290             :                                               bAvoidNoDataSingleBand);
    2291         475 :             break;
    2292             : 
    2293         539 :         case GDT_UInt32:
    2294         539 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
    2295             :                                               bAvoidNoDataSingleBand);
    2296         539 :             break;
    2297             : 
    2298        1342 :         case GDT_Int32:
    2299        1342 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
    2300             :                                              bAvoidNoDataSingleBand);
    2301        1342 :             break;
    2302             : 
    2303         224 :         case GDT_UInt64:
    2304         224 :             ClampRoundAndAvoidNoData<std::uint64_t>(
    2305             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2306         224 :             break;
    2307             : 
    2308         224 :         case GDT_Int64:
    2309         224 :             ClampRoundAndAvoidNoData<std::int64_t>(
    2310             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2311         224 :             break;
    2312             : 
    2313           0 :         case GDT_Float16:
    2314           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
    2315             :                                                bAvoidNoDataSingleBand);
    2316           0 :             break;
    2317             : 
    2318        3538 :         case GDT_Float32:
    2319        3538 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
    2320             :                                             bAvoidNoDataSingleBand);
    2321        3538 :             break;
    2322             : 
    2323       14486 :         case GDT_Float64:
    2324       14486 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
    2325             :                                              bAvoidNoDataSingleBand);
    2326       14486 :             break;
    2327             : 
    2328           0 :         case GDT_CInt16:
    2329             :         case GDT_CInt32:
    2330             :         case GDT_CFloat16:
    2331             :         case GDT_CFloat32:
    2332             :         case GDT_CFloat64:
    2333           0 :             return false;
    2334             : 
    2335           0 :         case GDT_Unknown:
    2336             :         case GDT_TypeCount:
    2337           0 :             CPLAssert(false);
    2338             :             return false;
    2339             :     }
    2340             : 
    2341     1330540 :     return true;
    2342             : }
    2343             : 
    2344             : /************************************************************************/
    2345             : /*                          GWKGetPixelValue()                          */
    2346             : /************************************************************************/
    2347             : 
    2348             : /* It is assumed that panUnifiedSrcValid has been checked before */
    2349             : 
    2350    30268000 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
    2351             :                              GPtrDiff_t iSrcOffset, double *pdfDensity,
    2352             :                              double *pdfReal, double *pdfImag)
    2353             : 
    2354             : {
    2355    30268000 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2356             : 
    2357    60536000 :     if (poWK->papanBandSrcValid != nullptr &&
    2358    30268000 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2359           0 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2360             :     {
    2361           0 :         *pdfDensity = 0.0;
    2362           0 :         return false;
    2363             :     }
    2364             : 
    2365    30268000 :     *pdfReal = 0.0;
    2366    30268000 :     *pdfImag = 0.0;
    2367             : 
    2368             :     // TODO(schwehr): Fix casting.
    2369    30268000 :     switch (poWK->eWorkingDataType)
    2370             :     {
    2371    29191100 :         case GDT_UInt8:
    2372    29191100 :             *pdfReal = pabySrc[iSrcOffset];
    2373    29191100 :             *pdfImag = 0.0;
    2374    29191100 :             break;
    2375             : 
    2376           0 :         case GDT_Int8:
    2377           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2378           0 :             *pdfImag = 0.0;
    2379           0 :             break;
    2380             : 
    2381       28232 :         case GDT_Int16:
    2382       28232 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2383       28232 :             *pdfImag = 0.0;
    2384       28232 :             break;
    2385             : 
    2386         166 :         case GDT_UInt16:
    2387         166 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2388         166 :             *pdfImag = 0.0;
    2389         166 :             break;
    2390             : 
    2391          63 :         case GDT_Int32:
    2392          63 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2393          63 :             *pdfImag = 0.0;
    2394          63 :             break;
    2395             : 
    2396          63 :         case GDT_UInt32:
    2397          63 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2398          63 :             *pdfImag = 0.0;
    2399          63 :             break;
    2400             : 
    2401           0 :         case GDT_Int64:
    2402           0 :             *pdfReal = static_cast<double>(
    2403           0 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2404           0 :             *pdfImag = 0.0;
    2405           0 :             break;
    2406             : 
    2407           0 :         case GDT_UInt64:
    2408           0 :             *pdfReal = static_cast<double>(
    2409           0 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2410           0 :             *pdfImag = 0.0;
    2411           0 :             break;
    2412             : 
    2413           0 :         case GDT_Float16:
    2414           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2415           0 :             *pdfImag = 0.0;
    2416           0 :             break;
    2417             : 
    2418     1047220 :         case GDT_Float32:
    2419     1047220 :             *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
    2420     1047220 :             *pdfImag = 0.0;
    2421     1047220 :             break;
    2422             : 
    2423         587 :         case GDT_Float64:
    2424         587 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2425         587 :             *pdfImag = 0.0;
    2426         587 :             break;
    2427             : 
    2428         133 :         case GDT_CInt16:
    2429         133 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
    2430         133 :             *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2431         133 :             break;
    2432             : 
    2433         133 :         case GDT_CInt32:
    2434         133 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
    2435         133 :             *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
    2436         133 :             break;
    2437             : 
    2438           0 :         case GDT_CFloat16:
    2439           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
    2440           0 :             *pdfImag =
    2441           0 :                 reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2442           0 :             break;
    2443             : 
    2444         194 :         case GDT_CFloat32:
    2445         194 :             *pdfReal =
    2446         194 :                 double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2]);
    2447         194 :             *pdfImag =
    2448         194 :                 double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1]);
    2449         194 :             break;
    2450             : 
    2451         138 :         case GDT_CFloat64:
    2452         138 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
    2453         138 :             *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
    2454         138 :             break;
    2455             : 
    2456           0 :         case GDT_Unknown:
    2457             :         case GDT_TypeCount:
    2458           0 :             CPLAssert(false);
    2459             :             *pdfDensity = 0.0;
    2460             :             return false;
    2461             :     }
    2462             : 
    2463    30268000 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2464     4194800 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2465             :     else
    2466    26073200 :         *pdfDensity = 1.0;
    2467             : 
    2468    30268000 :     return *pdfDensity != 0.0;
    2469             : }
    2470             : 
    2471             : /************************************************************************/
    2472             : /*                        GWKGetPixelValueReal()                        */
    2473             : /************************************************************************/
    2474             : 
    2475       15516 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2476             :                                  GPtrDiff_t iSrcOffset, double *pdfDensity,
    2477             :                                  double *pdfReal)
    2478             : 
    2479             : {
    2480       15516 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2481             : 
    2482       31034 :     if (poWK->papanBandSrcValid != nullptr &&
    2483       15518 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2484           2 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2485             :     {
    2486           0 :         *pdfDensity = 0.0;
    2487           0 :         return false;
    2488             :     }
    2489             : 
    2490       15516 :     switch (poWK->eWorkingDataType)
    2491             :     {
    2492           1 :         case GDT_UInt8:
    2493           1 :             *pdfReal = pabySrc[iSrcOffset];
    2494           1 :             break;
    2495             : 
    2496           0 :         case GDT_Int8:
    2497           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2498           0 :             break;
    2499             : 
    2500           1 :         case GDT_Int16:
    2501           1 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2502           1 :             break;
    2503             : 
    2504           1 :         case GDT_UInt16:
    2505           1 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2506           1 :             break;
    2507             : 
    2508         982 :         case GDT_Int32:
    2509         982 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2510         982 :             break;
    2511             : 
    2512         179 :         case GDT_UInt32:
    2513         179 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2514         179 :             break;
    2515             : 
    2516         112 :         case GDT_Int64:
    2517         112 :             *pdfReal = static_cast<double>(
    2518         112 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2519         112 :             break;
    2520             : 
    2521         112 :         case GDT_UInt64:
    2522         112 :             *pdfReal = static_cast<double>(
    2523         112 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2524         112 :             break;
    2525             : 
    2526           0 :         case GDT_Float16:
    2527           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2528           0 :             break;
    2529             : 
    2530           2 :         case GDT_Float32:
    2531           2 :             *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
    2532           2 :             break;
    2533             : 
    2534       14126 :         case GDT_Float64:
    2535       14126 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2536       14126 :             break;
    2537             : 
    2538           0 :         case GDT_CInt16:
    2539             :         case GDT_CInt32:
    2540             :         case GDT_CFloat16:
    2541             :         case GDT_CFloat32:
    2542             :         case GDT_CFloat64:
    2543             :         case GDT_Unknown:
    2544             :         case GDT_TypeCount:
    2545           0 :             CPLAssert(false);
    2546             :             return false;
    2547             :     }
    2548             : 
    2549       15516 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2550           0 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2551             :     else
    2552       15516 :         *pdfDensity = 1.0;
    2553             : 
    2554       15516 :     return *pdfDensity != 0.0;
    2555             : }
    2556             : 
    2557             : /************************************************************************/
    2558             : /*                           GWKGetPixelRow()                           */
    2559             : /************************************************************************/
    2560             : 
    2561             : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
    2562             : /* data-types. */
    2563             : 
    2564     2369710 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
    2565             :                            GPtrDiff_t iSrcOffset, int nHalfSrcLen,
    2566             :                            double *padfDensity, double adfReal[],
    2567             :                            double *padfImag)
    2568             : {
    2569             :     // We know that nSrcLen is even, so we can *always* unroll loops 2x.
    2570     2369710 :     const int nSrcLen = nHalfSrcLen * 2;
    2571     2369710 :     bool bHasValid = false;
    2572             : 
    2573     2369710 :     if (padfDensity != nullptr)
    2574             :     {
    2575             :         // Init the density.
    2576     3384030 :         for (int i = 0; i < nSrcLen; i += 2)
    2577             :         {
    2578     2211910 :             padfDensity[i] = 1.0;
    2579     2211910 :             padfDensity[i + 1] = 1.0;
    2580             :         }
    2581             : 
    2582     1172120 :         if (poWK->panUnifiedSrcValid != nullptr)
    2583             :         {
    2584     3281460 :             for (int i = 0; i < nSrcLen; i += 2)
    2585             :             {
    2586     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
    2587     2067740 :                     bHasValid = true;
    2588             :                 else
    2589       74323 :                     padfDensity[i] = 0.0;
    2590             : 
    2591     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
    2592     2068400 :                     bHasValid = true;
    2593             :                 else
    2594       73668 :                     padfDensity[i + 1] = 0.0;
    2595             :             }
    2596             : 
    2597             :             // Reset or fail as needed.
    2598     1139400 :             if (bHasValid)
    2599     1116590 :                 bHasValid = false;
    2600             :             else
    2601       22806 :                 return false;
    2602             :         }
    2603             : 
    2604     1149320 :         if (poWK->papanBandSrcValid != nullptr &&
    2605           0 :             poWK->papanBandSrcValid[iBand] != nullptr)
    2606             :         {
    2607           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2608             :             {
    2609           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
    2610           0 :                     bHasValid = true;
    2611             :                 else
    2612           0 :                     padfDensity[i] = 0.0;
    2613             : 
    2614           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
    2615           0 :                                iSrcOffset + i + 1))
    2616           0 :                     bHasValid = true;
    2617             :                 else
    2618           0 :                     padfDensity[i + 1] = 0.0;
    2619             :             }
    2620             : 
    2621             :             // Reset or fail as needed.
    2622           0 :             if (bHasValid)
    2623           0 :                 bHasValid = false;
    2624             :             else
    2625           0 :                 return false;
    2626             :         }
    2627             :     }
    2628             : 
    2629             :     // TODO(schwehr): Fix casting.
    2630             :     // Fetch data.
    2631     2346910 :     switch (poWK->eWorkingDataType)
    2632             :     {
    2633     1136680 :         case GDT_UInt8:
    2634             :         {
    2635     1136680 :             GByte *pSrc =
    2636     1136680 :                 reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
    2637     1136680 :             pSrc += iSrcOffset;
    2638     3281570 :             for (int i = 0; i < nSrcLen; i += 2)
    2639             :             {
    2640     2144890 :                 adfReal[i] = pSrc[i];
    2641     2144890 :                 adfReal[i + 1] = pSrc[i + 1];
    2642             :             }
    2643     1136680 :             break;
    2644             :         }
    2645             : 
    2646           0 :         case GDT_Int8:
    2647             :         {
    2648           0 :             GInt8 *pSrc =
    2649           0 :                 reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
    2650           0 :             pSrc += iSrcOffset;
    2651           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2652             :             {
    2653           0 :                 adfReal[i] = pSrc[i];
    2654           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2655             :             }
    2656           0 :             break;
    2657             :         }
    2658             : 
    2659        5950 :         case GDT_Int16:
    2660             :         {
    2661        5950 :             GInt16 *pSrc =
    2662        5950 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2663        5950 :             pSrc += iSrcOffset;
    2664       22164 :             for (int i = 0; i < nSrcLen; i += 2)
    2665             :             {
    2666       16214 :                 adfReal[i] = pSrc[i];
    2667       16214 :                 adfReal[i + 1] = pSrc[i + 1];
    2668             :             }
    2669        5950 :             break;
    2670             :         }
    2671             : 
    2672        4310 :         case GDT_UInt16:
    2673             :         {
    2674        4310 :             GUInt16 *pSrc =
    2675        4310 :                 reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
    2676        4310 :             pSrc += iSrcOffset;
    2677       18884 :             for (int i = 0; i < nSrcLen; i += 2)
    2678             :             {
    2679       14574 :                 adfReal[i] = pSrc[i];
    2680       14574 :                 adfReal[i + 1] = pSrc[i + 1];
    2681             :             }
    2682        4310 :             break;
    2683             :         }
    2684             : 
    2685         946 :         case GDT_Int32:
    2686             :         {
    2687         946 :             GInt32 *pSrc =
    2688         946 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2689         946 :             pSrc += iSrcOffset;
    2690        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2691             :             {
    2692        1678 :                 adfReal[i] = pSrc[i];
    2693        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2694             :             }
    2695         946 :             break;
    2696             :         }
    2697             : 
    2698         946 :         case GDT_UInt32:
    2699             :         {
    2700         946 :             GUInt32 *pSrc =
    2701         946 :                 reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
    2702         946 :             pSrc += iSrcOffset;
    2703        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2704             :             {
    2705        1678 :                 adfReal[i] = pSrc[i];
    2706        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2707             :             }
    2708         946 :             break;
    2709             :         }
    2710             : 
    2711         196 :         case GDT_Int64:
    2712             :         {
    2713         196 :             auto pSrc =
    2714         196 :                 reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
    2715         196 :             pSrc += iSrcOffset;
    2716         392 :             for (int i = 0; i < nSrcLen; i += 2)
    2717             :             {
    2718         196 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2719         196 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2720             :             }
    2721         196 :             break;
    2722             :         }
    2723             : 
    2724         196 :         case GDT_UInt64:
    2725             :         {
    2726         196 :             auto pSrc =
    2727         196 :                 reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
    2728         196 :             pSrc += iSrcOffset;
    2729         392 :             for (int i = 0; i < nSrcLen; i += 2)
    2730             :             {
    2731         196 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2732         196 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2733             :             }
    2734         196 :             break;
    2735             :         }
    2736             : 
    2737           0 :         case GDT_Float16:
    2738             :         {
    2739           0 :             GFloat16 *pSrc =
    2740           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2741           0 :             pSrc += iSrcOffset;
    2742           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2743             :             {
    2744           0 :                 adfReal[i] = pSrc[i];
    2745           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2746             :             }
    2747           0 :             break;
    2748             :         }
    2749             : 
    2750       25270 :         case GDT_Float32:
    2751             :         {
    2752       25270 :             float *pSrc =
    2753       25270 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2754       25270 :             pSrc += iSrcOffset;
    2755      121739 :             for (int i = 0; i < nSrcLen; i += 2)
    2756             :             {
    2757       96469 :                 adfReal[i] = double(pSrc[i]);
    2758       96469 :                 adfReal[i + 1] = double(pSrc[i + 1]);
    2759             :             }
    2760       25270 :             break;
    2761             :         }
    2762             : 
    2763         946 :         case GDT_Float64:
    2764             :         {
    2765         946 :             double *pSrc =
    2766         946 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2767         946 :             pSrc += iSrcOffset;
    2768        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2769             :             {
    2770        1678 :                 adfReal[i] = pSrc[i];
    2771        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2772             :             }
    2773         946 :             break;
    2774             :         }
    2775             : 
    2776     1169220 :         case GDT_CInt16:
    2777             :         {
    2778     1169220 :             GInt16 *pSrc =
    2779     1169220 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2780     1169220 :             pSrc += 2 * iSrcOffset;
    2781     4676020 :             for (int i = 0; i < nSrcLen; i += 2)
    2782             :             {
    2783     3506800 :                 adfReal[i] = pSrc[2 * i];
    2784     3506800 :                 padfImag[i] = pSrc[2 * i + 1];
    2785             : 
    2786     3506800 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2787     3506800 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2788             :             }
    2789     1169220 :             break;
    2790             :         }
    2791             : 
    2792         750 :         case GDT_CInt32:
    2793             :         {
    2794         750 :             GInt32 *pSrc =
    2795         750 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2796         750 :             pSrc += 2 * iSrcOffset;
    2797        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2798             :             {
    2799        1482 :                 adfReal[i] = pSrc[2 * i];
    2800        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2801             : 
    2802        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2803        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2804             :             }
    2805         750 :             break;
    2806             :         }
    2807             : 
    2808           0 :         case GDT_CFloat16:
    2809             :         {
    2810           0 :             GFloat16 *pSrc =
    2811           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2812           0 :             pSrc += 2 * iSrcOffset;
    2813           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2814             :             {
    2815           0 :                 adfReal[i] = pSrc[2 * i];
    2816           0 :                 padfImag[i] = pSrc[2 * i + 1];
    2817             : 
    2818           0 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2819           0 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2820             :             }
    2821           0 :             break;
    2822             :         }
    2823             : 
    2824         750 :         case GDT_CFloat32:
    2825             :         {
    2826         750 :             float *pSrc =
    2827         750 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2828         750 :             pSrc += 2 * iSrcOffset;
    2829        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2830             :             {
    2831        1482 :                 adfReal[i] = double(pSrc[2 * i]);
    2832        1482 :                 padfImag[i] = double(pSrc[2 * i + 1]);
    2833             : 
    2834        1482 :                 adfReal[i + 1] = double(pSrc[2 * i + 2]);
    2835        1482 :                 padfImag[i + 1] = double(pSrc[2 * i + 3]);
    2836             :             }
    2837         750 :             break;
    2838             :         }
    2839             : 
    2840         750 :         case GDT_CFloat64:
    2841             :         {
    2842         750 :             double *pSrc =
    2843         750 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2844         750 :             pSrc += 2 * iSrcOffset;
    2845        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2846             :             {
    2847        1482 :                 adfReal[i] = pSrc[2 * i];
    2848        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2849             : 
    2850        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2851        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2852             :             }
    2853         750 :             break;
    2854             :         }
    2855             : 
    2856           0 :         case GDT_Unknown:
    2857             :         case GDT_TypeCount:
    2858           0 :             CPLAssert(false);
    2859             :             if (padfDensity)
    2860             :                 memset(padfDensity, 0, nSrcLen * sizeof(double));
    2861             :             return false;
    2862             :     }
    2863             : 
    2864     2346910 :     if (padfDensity == nullptr)
    2865     1197590 :         return true;
    2866             : 
    2867     1149320 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2868             :     {
    2869     3256740 :         for (int i = 0; i < nSrcLen; i += 2)
    2870             :         {
    2871             :             // Take into account earlier calcs.
    2872     2127390 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2873             :             {
    2874     2087480 :                 padfDensity[i] = 1.0;
    2875     2087480 :                 bHasValid = true;
    2876             :             }
    2877             : 
    2878     2127390 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2879             :             {
    2880     2088140 :                 padfDensity[i + 1] = 1.0;
    2881     2088140 :                 bHasValid = true;
    2882             :             }
    2883             :         }
    2884             :     }
    2885             :     else
    2886             :     {
    2887       70068 :         for (int i = 0; i < nSrcLen; i += 2)
    2888             :         {
    2889       50103 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2890       50103 :                 padfDensity[i] =
    2891       50103 :                     double(poWK->pafUnifiedSrcDensity[iSrcOffset + i]);
    2892       50103 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2893       49252 :                 bHasValid = true;
    2894             : 
    2895       50103 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2896       50103 :                 padfDensity[i + 1] =
    2897       50103 :                     double(poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]);
    2898       50103 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2899       49170 :                 bHasValid = true;
    2900             :         }
    2901             :     }
    2902             : 
    2903     1149320 :     return bHasValid;
    2904             : }
    2905             : 
    2906             : /************************************************************************/
    2907             : /*                            GWKGetPixelT()                            */
    2908             : /************************************************************************/
    2909             : 
    2910             : template <class T>
    2911    10002719 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
    2912             :                          GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
    2913             : 
    2914             : {
    2915    10002719 :     T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2916             : 
    2917    22733143 :     if ((poWK->panUnifiedSrcValid != nullptr &&
    2918    20005418 :          !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
    2919    10002719 :         (poWK->papanBandSrcValid != nullptr &&
    2920      589836 :          poWK->papanBandSrcValid[iBand] != nullptr &&
    2921      589836 :          !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
    2922             :     {
    2923           9 :         *pdfDensity = 0.0;
    2924           9 :         return false;
    2925             :     }
    2926             : 
    2927    10002709 :     *pValue = pSrc[iSrcOffset];
    2928             : 
    2929    10002709 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2930     8880346 :         *pdfDensity = 1.0;
    2931             :     else
    2932     1122362 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2933             : 
    2934    10002709 :     return *pdfDensity != 0.0;
    2935             : }
    2936             : 
    2937             : /************************************************************************/
    2938             : /*                        GWKBilinearResample()                         */
    2939             : /*     Set of bilinear interpolators                                    */
    2940             : /************************************************************************/
    2941             : 
    2942       77448 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2943             :                                        double dfSrcX, double dfSrcY,
    2944             :                                        double *pdfDensity, double *pdfReal,
    2945             :                                        double *pdfImag)
    2946             : 
    2947             : {
    2948             :     // Save as local variables to avoid following pointers.
    2949       77448 :     const int nSrcXSize = poWK->nSrcXSize;
    2950       77448 :     const int nSrcYSize = poWK->nSrcYSize;
    2951             : 
    2952       77448 :     int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2953       77448 :     int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2954       77448 :     double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2955       77448 :     double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2956       77448 :     bool bShifted = false;
    2957             : 
    2958       77448 :     if (iSrcX == -1)
    2959             :     {
    2960        1534 :         iSrcX = 0;
    2961        1534 :         dfRatioX = 1;
    2962             :     }
    2963       77448 :     if (iSrcY == -1)
    2964             :     {
    2965        7734 :         iSrcY = 0;
    2966        7734 :         dfRatioY = 1;
    2967             :     }
    2968       77448 :     GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    2969             : 
    2970             :     // Shift so we don't overrun the array.
    2971       77448 :     if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
    2972       77330 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
    2973       77330 :             iSrcOffset + nSrcXSize + 1)
    2974             :     {
    2975         230 :         bShifted = true;
    2976         230 :         --iSrcOffset;
    2977             :     }
    2978             : 
    2979       77448 :     double adfDensity[2] = {0.0, 0.0};
    2980       77448 :     double adfReal[2] = {0.0, 0.0};
    2981       77448 :     double adfImag[2] = {0.0, 0.0};
    2982       77448 :     double dfAccumulatorReal = 0.0;
    2983       77448 :     double dfAccumulatorImag = 0.0;
    2984       77448 :     double dfAccumulatorDensity = 0.0;
    2985       77448 :     double dfAccumulatorDivisor = 0.0;
    2986             : 
    2987       77448 :     const GPtrDiff_t nSrcPixels =
    2988       77448 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
    2989             :     // Get pixel row.
    2990       77448 :     if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
    2991      154896 :         iSrcOffset < nSrcPixels &&
    2992       77448 :         GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
    2993             :                        adfImag))
    2994             :     {
    2995       71504 :         double dfMult1 = dfRatioX * dfRatioY;
    2996       71504 :         double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
    2997             : 
    2998             :         // Shifting corrected.
    2999       71504 :         if (bShifted)
    3000             :         {
    3001         230 :             adfReal[0] = adfReal[1];
    3002         230 :             adfImag[0] = adfImag[1];
    3003         230 :             adfDensity[0] = adfDensity[1];
    3004             :         }
    3005             : 
    3006             :         // Upper Left Pixel.
    3007       71504 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    3008       71504 :             adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3009             :         {
    3010       66050 :             dfAccumulatorDivisor += dfMult1;
    3011             : 
    3012       66050 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    3013       66050 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    3014       66050 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    3015             :         }
    3016             : 
    3017             :         // Upper Right Pixel.
    3018       71504 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    3019       70609 :             adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3020             :         {
    3021       65335 :             dfAccumulatorDivisor += dfMult2;
    3022             : 
    3023       65335 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    3024       65335 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    3025       65335 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    3026             :         }
    3027             :     }
    3028             : 
    3029             :     // Get pixel row.
    3030       77448 :     if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
    3031      228032 :         iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
    3032       73136 :         GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
    3033             :                        adfReal, adfImag))
    3034             :     {
    3035       67577 :         double dfMult1 = dfRatioX * (1.0 - dfRatioY);
    3036       67577 :         double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    3037             : 
    3038             :         // Shifting corrected
    3039       67577 :         if (bShifted)
    3040             :         {
    3041         112 :             adfReal[0] = adfReal[1];
    3042         112 :             adfImag[0] = adfImag[1];
    3043         112 :             adfDensity[0] = adfDensity[1];
    3044             :         }
    3045             : 
    3046             :         // Lower Left Pixel
    3047       67577 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    3048       67577 :             adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3049             :         {
    3050       62298 :             dfAccumulatorDivisor += dfMult1;
    3051             : 
    3052       62298 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    3053       62298 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    3054       62298 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    3055             :         }
    3056             : 
    3057             :         // Lower Right Pixel.
    3058       67577 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    3059       66800 :             adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3060             :         {
    3061       61823 :             dfAccumulatorDivisor += dfMult2;
    3062             : 
    3063       61823 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    3064       61823 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    3065       61823 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    3066             :         }
    3067             :     }
    3068             : 
    3069             :     /* -------------------------------------------------------------------- */
    3070             :     /*      Return result.                                                  */
    3071             :     /* -------------------------------------------------------------------- */
    3072       77448 :     if (dfAccumulatorDivisor == 1.0)
    3073             :     {
    3074       45929 :         *pdfReal = dfAccumulatorReal;
    3075       45929 :         *pdfImag = dfAccumulatorImag;
    3076       45929 :         *pdfDensity = dfAccumulatorDensity;
    3077       45929 :         return false;
    3078             :     }
    3079       31519 :     else if (dfAccumulatorDivisor < 0.00001)
    3080             :     {
    3081           0 :         *pdfReal = 0.0;
    3082           0 :         *pdfImag = 0.0;
    3083           0 :         *pdfDensity = 0.0;
    3084           0 :         return false;
    3085             :     }
    3086             :     else
    3087             :     {
    3088       31519 :         *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
    3089       31519 :         *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
    3090       31519 :         *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
    3091       31519 :         return true;
    3092             :     }
    3093             : }
    3094             : 
    3095             : template <class T>
    3096     7278342 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3097             :                                                int iBand, double dfSrcX,
    3098             :                                                double dfSrcY, T *pValue)
    3099             : 
    3100             : {
    3101             : 
    3102     7278342 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3103     7278342 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3104     7278342 :     GPtrDiff_t iSrcOffset =
    3105     7278342 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3106     7278342 :     const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    3107     7278342 :     const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    3108             : 
    3109     7278342 :     const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    3110             : 
    3111     7278342 :     if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    3112     4978059 :         iSrcY + 1 < poWK->nSrcYSize)
    3113             :     {
    3114     4931242 :         const double dfAccumulator =
    3115     4931242 :             (double(pSrc[iSrcOffset]) * dfRatioX +
    3116     4931242 :              double(pSrc[iSrcOffset + 1]) * (1.0 - dfRatioX)) *
    3117             :                 dfRatioY +
    3118     4931242 :             (double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfRatioX +
    3119     4931242 :              double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) *
    3120     4931242 :                  (1.0 - dfRatioX)) *
    3121     4931242 :                 (1.0 - dfRatioY);
    3122             : 
    3123     4931242 :         *pValue = GWKRoundValueT<T>(dfAccumulator);
    3124             : 
    3125     4931242 :         return true;
    3126             :     }
    3127             : 
    3128     2347100 :     double dfAccumulatorDivisor = 0.0;
    3129     2347100 :     double dfAccumulator = 0.0;
    3130             : 
    3131             :     // Upper Left Pixel.
    3132     2347100 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
    3133      278940 :         iSrcY < poWK->nSrcYSize)
    3134             :     {
    3135      278940 :         const double dfMult = dfRatioX * dfRatioY;
    3136             : 
    3137      278940 :         dfAccumulatorDivisor += dfMult;
    3138             : 
    3139      278940 :         dfAccumulator += double(pSrc[iSrcOffset]) * dfMult;
    3140             :     }
    3141             : 
    3142             :     // Upper Right Pixel.
    3143     2347100 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    3144     1980536 :         iSrcY < poWK->nSrcYSize)
    3145             :     {
    3146     1980536 :         const double dfMult = (1.0 - dfRatioX) * dfRatioY;
    3147             : 
    3148     1980536 :         dfAccumulatorDivisor += dfMult;
    3149             : 
    3150     1980536 :         dfAccumulator += double(pSrc[iSrcOffset + 1]) * dfMult;
    3151             :     }
    3152             : 
    3153             :     // Lower Right Pixel.
    3154     2347100 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    3155     2081444 :         iSrcY + 1 < poWK->nSrcYSize)
    3156             :     {
    3157     2001333 :         const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    3158             : 
    3159     2001333 :         dfAccumulatorDivisor += dfMult;
    3160             : 
    3161     2001333 :         dfAccumulator +=
    3162     2001333 :             double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) * dfMult;
    3163             :     }
    3164             : 
    3165             :     // Lower Left Pixel.
    3166     2347100 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    3167      379800 :         iSrcY + 1 < poWK->nSrcYSize)
    3168             :     {
    3169      299461 :         const double dfMult = dfRatioX * (1.0 - dfRatioY);
    3170             : 
    3171      299461 :         dfAccumulatorDivisor += dfMult;
    3172             : 
    3173      299461 :         dfAccumulator += double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfMult;
    3174             :     }
    3175             : 
    3176             :     /* -------------------------------------------------------------------- */
    3177             :     /*      Return result.                                                  */
    3178             :     /* -------------------------------------------------------------------- */
    3179     2347100 :     double dfValue = 0.0;
    3180             : 
    3181     2347100 :     if (dfAccumulatorDivisor < 0.00001)
    3182             :     {
    3183           0 :         *pValue = 0;
    3184           0 :         return false;
    3185             :     }
    3186     2347100 :     else if (dfAccumulatorDivisor == 1.0)
    3187             :     {
    3188        7320 :         dfValue = dfAccumulator;
    3189             :     }
    3190             :     else
    3191             :     {
    3192     2339778 :         dfValue = dfAccumulator / dfAccumulatorDivisor;
    3193             :     }
    3194             : 
    3195     2347100 :     *pValue = GWKRoundValueT<T>(dfValue);
    3196             : 
    3197     2347100 :     return true;
    3198             : }
    3199             : 
    3200             : /************************************************************************/
    3201             : /*                        GWKCubicResample()                            */
    3202             : /*     Set of bicubic interpolators using cubic convolution.            */
    3203             : /************************************************************************/
    3204             : 
    3205             : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
    3206             : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
    3207             : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
    3208             : 
    3209             : template <typename T>
    3210     1810720 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
    3211             :                                  T f1, T f2, T f3)
    3212             : {
    3213     1810720 :     return (f1 + T(0.5) * (distance1 * (f2 - f0) +
    3214     1810720 :                            distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
    3215     1810720 :                            distance3 * (3 * (f1 - f2) + f3 - f0)));
    3216             : }
    3217             : 
    3218             : /************************************************************************/
    3219             : /*                       GWKCubicComputeWeights()                       */
    3220             : /************************************************************************/
    3221             : 
    3222             : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
    3223             : 
    3224             : template <typename T>
    3225    80324960 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
    3226             : {
    3227    80324960 :     const T halfX = T(0.5) * x;
    3228    80324960 :     const T threeX = T(3.0) * x;
    3229    80324960 :     const T halfX2 = halfX * x;
    3230             : 
    3231    80324960 :     coeffs[0] = halfX * (-1 + x * (2 - x));
    3232    80324960 :     coeffs[1] = 1 + halfX2 * (-5 + threeX);
    3233    80324960 :     coeffs[2] = halfX * (1 + x * (4 - threeX));
    3234    80324960 :     coeffs[3] = halfX2 * (-1 + x);
    3235    80324960 : }
    3236             : 
    3237    14682546 : template <typename T> inline double CONVOL4(const double v1[4], const T v2[4])
    3238             : {
    3239    14682546 :     return v1[0] * double(v2[0]) + v1[1] * double(v2[1]) +
    3240    14682546 :            v1[2] * double(v2[2]) + v1[3] * double(v2[3]);
    3241             : }
    3242             : 
    3243             : #if 0
    3244             : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
    3245             : // instead of 17.
    3246             : // TODO(schwehr): Use an inline function.
    3247             : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX)             \
    3248             :     {                                                                          \
    3249             :         const double dfX = dfX_;                                               \
    3250             :         dfHalfX = 0.5 * dfX;                                                   \
    3251             :         const double dfThreeX = 3.0 * dfX;                                     \
    3252             :         const double dfXMinus1 = dfX - 1;                                      \
    3253             :                                                                                \
    3254             :         adfCoeffs[0] = -1 + dfX * (2 - dfX);                                   \
    3255             :         adfCoeffs[1] = dfX * (-5 + dfThreeX);                                  \
    3256             :         /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/                           \
    3257             :         adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1];                              \
    3258             :         /*adfCoeffs[3] = dfX * (-1 + dfX); */                                  \
    3259             :         adfCoeffs[3] = dfXMinus1 - adfCoeffs[0];                               \
    3260             :     }
    3261             : 
    3262             : // TODO(schwehr): Use an inline function.
    3263             : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX)                               \
    3264             :     ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
    3265             :                            (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
    3266             : #endif
    3267             : 
    3268      302045 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
    3269             :                                     double dfSrcX, double dfSrcY,
    3270             :                                     double *pdfDensity, double *pdfReal,
    3271             :                                     double *pdfImag)
    3272             : 
    3273             : {
    3274      302045 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3275      302045 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3276      302045 :     GPtrDiff_t iSrcOffset =
    3277      302045 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3278      302045 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3279      302045 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3280      302045 :     double adfDensity[4] = {};
    3281      302045 :     double adfReal[4] = {};
    3282      302045 :     double adfImag[4] = {};
    3283             : 
    3284             :     // Get the bilinear interpolation at the image borders.
    3285      302045 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3286      286140 :         iSrcY + 2 >= poWK->nSrcYSize)
    3287       24670 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3288       24670 :                                           pdfDensity, pdfReal, pdfImag);
    3289             : 
    3290      277375 :     double adfValueDens[4] = {};
    3291      277375 :     double adfValueReal[4] = {};
    3292      277375 :     double adfValueImag[4] = {};
    3293             : 
    3294      277375 :     double adfCoeffsX[4] = {};
    3295      277375 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3296             : 
    3297     1240570 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3298             :     {
    3299     1009640 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3300      998035 :                             2, adfDensity, adfReal, adfImag) ||
    3301      998035 :             adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3302      980395 :             adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3303     2979770 :             adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3304      972094 :             adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
    3305             :         {
    3306       46449 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3307       46449 :                                               pdfDensity, pdfReal, pdfImag);
    3308             :         }
    3309             : 
    3310      963196 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3311      963196 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3312      963196 :         adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
    3313             :     }
    3314             : 
    3315             :     /* -------------------------------------------------------------------- */
    3316             :     /*      For now, if we have any pixels missing in the kernel area,      */
    3317             :     /*      we fallback on using bilinear interpolation.  Ideally we        */
    3318             :     /*      should do "weight adjustment" of our results similarly to       */
    3319             :     /*      what is done for the cubic spline and lanc. interpolators.      */
    3320             :     /* -------------------------------------------------------------------- */
    3321             : 
    3322      230926 :     double adfCoeffsY[4] = {};
    3323      230926 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3324             : 
    3325      230926 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3326      230926 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3327      230926 :     *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
    3328             : 
    3329      230926 :     return true;
    3330             : }
    3331             : 
    3332             : #ifdef USE_SSE2
    3333             : 
    3334             : /************************************************************************/
    3335             : /*                           XMMLoad4Values()                           */
    3336             : /*                                                                      */
    3337             : /*  Load 4 packed byte or uint16, cast them to float and put them in a  */
    3338             : /*  m128 register.                                                      */
    3339             : /************************************************************************/
    3340             : 
    3341   462280000 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
    3342             : {
    3343             :     unsigned int i;
    3344   462280000 :     memcpy(&i, ptr, 4);
    3345   924560000 :     __m128i xmm_i = _mm_cvtsi32_si128(i);
    3346             :     // Zero extend 4 packed unsigned 8-bit integers in a to packed
    3347             :     // 32-bit integers.
    3348             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3349             :     xmm_i = _mm_cvtepu8_epi32(xmm_i);
    3350             : #else
    3351   924560000 :     xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
    3352   924560000 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3353             : #endif
    3354   924560000 :     return _mm_cvtepi32_ps(xmm_i);
    3355             : }
    3356             : 
    3357     1108340 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
    3358             : {
    3359             :     GUInt64 i;
    3360     1108340 :     memcpy(&i, ptr, 8);
    3361     2216690 :     __m128i xmm_i = _mm_cvtsi64_si128(i);
    3362             :     // Zero extend 4 packed unsigned 16-bit integers in a to packed
    3363             :     // 32-bit integers.
    3364             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3365             :     xmm_i = _mm_cvtepu16_epi32(xmm_i);
    3366             : #else
    3367     2216690 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3368             : #endif
    3369     2216690 :     return _mm_cvtepi32_ps(xmm_i);
    3370             : }
    3371             : 
    3372             : /************************************************************************/
    3373             : /*                           XMMHorizontalAdd()                         */
    3374             : /*                                                                      */
    3375             : /*  Return the sum of the 4 floating points of the register.            */
    3376             : /************************************************************************/
    3377             : 
    3378             : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
    3379             : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3380             : {
    3381             :     __m128 shuf = _mm_movehdup_ps(v);   // (v3   , v3   , v1   , v1)
    3382             :     __m128 sums = _mm_add_ps(v, shuf);  // (v3+v3, v3+v2, v1+v1, v1+v0)
    3383             :     shuf = _mm_movehl_ps(shuf, sums);   // (v3   , v3   , v3+v3, v3+v2)
    3384             :     sums = _mm_add_ss(sums, shuf);      // (v1+v0)+(v3+v2)
    3385             :     return _mm_cvtss_f32(sums);
    3386             : }
    3387             : #else
    3388   115847000 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3389             : {
    3390   115847000 :     __m128 shuf = _mm_movehl_ps(v, v);     // (v3   , v2   , v3   , v2)
    3391   115847000 :     __m128 sums = _mm_add_ps(v, shuf);     // (v3+v3, v2+v2, v3+v1, v2+v0)
    3392   115847000 :     shuf = _mm_shuffle_ps(sums, sums, 1);  // (v2+v0, v2+v0, v2+v0, v3+v1)
    3393   115847000 :     sums = _mm_add_ss(sums, shuf);         // (v2+v0)+(v3+v1)
    3394   115847000 :     return _mm_cvtss_f32(sums);
    3395             : }
    3396             : #endif
    3397             : 
    3398             : #endif  // define USE_SSE2
    3399             : 
    3400             : /************************************************************************/
    3401             : /*            GWKCubicResampleSrcMaskIsDensity4SampleRealT()            */
    3402             : /************************************************************************/
    3403             : 
    3404             : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
    3405             : // because there are a few assumptions above those types.
    3406             : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
    3407             : // perf benefit.
    3408             : 
    3409             : template <class T>
    3410      389755 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
    3411             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3412             :     double *pdfDensity, double *pdfReal)
    3413             : {
    3414      389755 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3415      389755 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3416      389755 :     const GPtrDiff_t iSrcOffset =
    3417      389755 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3418             : 
    3419             :     // Get the bilinear interpolation at the image borders.
    3420      389755 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3421      387271 :         iSrcY + 2 >= poWK->nSrcYSize)
    3422             :     {
    3423        2484 :         double adfImagIgnored[4] = {};
    3424        2484 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3425        2484 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3426             :     }
    3427             : 
    3428             : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3429             :     const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
    3430             :     const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
    3431             : 
    3432             :     // TODO(schwehr): Explain the magic numbers.
    3433             :     float afTemp[4 + 4 + 4 + 1];
    3434             :     float *pafAligned =
    3435             :         reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
    3436             :     float *pafCoeffs = pafAligned;
    3437             :     float *pafDensity = pafAligned + 4;
    3438             :     float *pafValue = pafAligned + 8;
    3439             : 
    3440             :     const float fHalfDeltaX = 0.5f * fDeltaX;
    3441             :     const float fThreeDeltaX = 3.0f * fDeltaX;
    3442             :     const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
    3443             : 
    3444             :     pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
    3445             :     pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
    3446             :     pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
    3447             :     pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
    3448             :     __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
    3449             :     const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD_FLOAT);
    3450             : 
    3451             :     __m128 xmmMaskLowDensity = _mm_setzero_ps();
    3452             :     for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
    3453             :          i++, iOffset += poWK->nSrcXSize)
    3454             :     {
    3455             :         const __m128 xmmDensity =
    3456             :             _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
    3457             :         xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
    3458             :                                       _mm_cmplt_ps(xmmDensity, xmmThreshold));
    3459             :         pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3460             : 
    3461             :         const __m128 xmmValues =
    3462             :             XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
    3463             :         pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
    3464             :     }
    3465             :     if (_mm_movemask_ps(xmmMaskLowDensity))
    3466             :     {
    3467             :         double adfImagIgnored[4] = {};
    3468             :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3469             :                                           pdfDensity, pdfReal, adfImagIgnored);
    3470             :     }
    3471             : 
    3472             :     const float fHalfDeltaY = 0.5f * fDeltaY;
    3473             :     const float fThreeDeltaY = 3.0f * fDeltaY;
    3474             :     const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
    3475             : 
    3476             :     pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
    3477             :     pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
    3478             :     pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
    3479             :     pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
    3480             : 
    3481             :     xmmCoeffs = _mm_load_ps(pafCoeffs);
    3482             : 
    3483             :     const __m128 xmmDensity = _mm_load_ps(pafDensity);
    3484             :     const __m128 xmmValue = _mm_load_ps(pafValue);
    3485             :     *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3486             :     *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
    3487             : 
    3488             :     // We did all above computations on float32 whereas the general case is
    3489             :     // float64. Not sure if one is fundamentally more correct than the other
    3490             :     // one, but we want our optimization to give the same result as the
    3491             :     // general case as much as possible, so if the resulting value is
    3492             :     // close to some_int_value + 0.5, redo the computation with the general
    3493             :     // case.
    3494             :     // Note: If other types than Byte or UInt16, will need changes.
    3495             :     if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
    3496             :         return true;
    3497             : 
    3498             : #endif  // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3499             : 
    3500      387271 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3501      387271 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3502             : 
    3503      387271 :     double adfValueDens[4] = {};
    3504      387271 :     double adfValueReal[4] = {};
    3505             : 
    3506      387271 :     double adfCoeffsX[4] = {};
    3507      387271 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3508             : 
    3509      387271 :     double adfCoeffsY[4] = {};
    3510      387271 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3511             : 
    3512     1930200 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3513             :     {
    3514     1544480 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3515             : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
    3516     1544480 :         if (poWK->pafUnifiedSrcDensity[iOffset + 0] <
    3517     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3518     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 1] <
    3519     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3520     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 2] <
    3521     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3522     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 3] <
    3523             :                 SRC_DENSITY_THRESHOLD_FLOAT)
    3524             :         {
    3525        1551 :             double adfImagIgnored[4] = {};
    3526        1551 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3527             :                                               pdfDensity, pdfReal,
    3528        1551 :                                               adfImagIgnored);
    3529             :         }
    3530             : #endif
    3531             : 
    3532     3085860 :         adfValueDens[i + 1] =
    3533     1542930 :             CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
    3534             : 
    3535     1542930 :         adfValueReal[i + 1] = CONVOL4(
    3536             :             adfCoeffsX,
    3537     1542930 :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3538             :     }
    3539             : 
    3540      385720 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3541      385720 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3542             : 
    3543      385720 :     return true;
    3544             : }
    3545             : 
    3546             : /************************************************************************/
    3547             : /*              GWKCubicResampleSrcMaskIsDensity4SampleReal()             */
    3548             : /*     Bi-cubic when source has and only has pafUnifiedSrcDensity.      */
    3549             : /************************************************************************/
    3550             : 
    3551           0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
    3552             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3553             :     double *pdfDensity, double *pdfReal)
    3554             : 
    3555             : {
    3556           0 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3557           0 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3558           0 :     const GPtrDiff_t iSrcOffset =
    3559           0 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3560           0 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3561           0 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3562             : 
    3563             :     // Get the bilinear interpolation at the image borders.
    3564           0 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3565           0 :         iSrcY + 2 >= poWK->nSrcYSize)
    3566             :     {
    3567           0 :         double adfImagIgnored[4] = {};
    3568           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3569           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3570             :     }
    3571             : 
    3572           0 :     double adfCoeffsX[4] = {};
    3573           0 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3574             : 
    3575           0 :     double adfCoeffsY[4] = {};
    3576           0 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3577             : 
    3578           0 :     double adfValueDens[4] = {};
    3579           0 :     double adfValueReal[4] = {};
    3580           0 :     double adfDensity[4] = {};
    3581           0 :     double adfReal[4] = {};
    3582           0 :     double adfImagIgnored[4] = {};
    3583             : 
    3584           0 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3585             :     {
    3586           0 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3587           0 :                             2, adfDensity, adfReal, adfImagIgnored) ||
    3588           0 :             adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3589           0 :             adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3590           0 :             adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3591           0 :             adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
    3592             :         {
    3593           0 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3594             :                                               pdfDensity, pdfReal,
    3595           0 :                                               adfImagIgnored);
    3596             :         }
    3597             : 
    3598           0 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3599           0 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3600             :     }
    3601             : 
    3602           0 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3603           0 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3604             : 
    3605           0 :     return true;
    3606             : }
    3607             : 
    3608             : template <class T>
    3609     2300964 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3610             :                                             int iBand, double dfSrcX,
    3611             :                                             double dfSrcY, T *pValue)
    3612             : 
    3613             : {
    3614     2300964 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3615     2300964 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3616     2300964 :     const GPtrDiff_t iSrcOffset =
    3617     2300964 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3618     2300964 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3619     2300964 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3620     2300964 :     const double dfDeltaY2 = dfDeltaY * dfDeltaY;
    3621     2300964 :     const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
    3622             : 
    3623             :     // Get the bilinear interpolation at the image borders.
    3624     2300964 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3625     1883033 :         iSrcY + 2 >= poWK->nSrcYSize)
    3626      490244 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    3627      490244 :                                                   pValue);
    3628             : 
    3629     1810720 :     double adfCoeffs[4] = {};
    3630     1810720 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
    3631             : 
    3632     1810720 :     double adfValue[4] = {};
    3633             : 
    3634     9053590 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3635             :     {
    3636     7242876 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3637             : 
    3638     7242876 :         adfValue[i + 1] = CONVOL4(
    3639             :             adfCoeffs,
    3640     7242876 :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3641             :     }
    3642             : 
    3643             :     const double dfValue =
    3644     1810720 :         CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
    3645             :                          adfValue[1], adfValue[2], adfValue[3]);
    3646             : 
    3647     1810720 :     *pValue = GWKClampValueT<T>(dfValue);
    3648             : 
    3649     1810720 :     return true;
    3650             : }
    3651             : 
    3652             : /************************************************************************/
    3653             : /*                           GWKLanczosSinc()                           */
    3654             : /************************************************************************/
    3655             : 
    3656             : /*
    3657             :  * Lanczos windowed sinc interpolation kernel with radius r.
    3658             :  *        /
    3659             :  *        | sinc(x) * sinc(x/r), if |x| < r
    3660             :  * L(x) = | 1, if x = 0                     ,
    3661             :  *        | 0, otherwise
    3662             :  *        \
    3663             :  *
    3664             :  * where sinc(x) = sin(PI * x) / (PI * x).
    3665             :  */
    3666             : 
    3667        1632 : static double GWKLanczosSinc(double dfX)
    3668             : {
    3669        1632 :     if (dfX == 0.0)
    3670           0 :         return 1.0;
    3671             : 
    3672        1632 :     const double dfPIX = M_PI * dfX;
    3673        1632 :     const double dfPIXoverR = dfPIX / 3;
    3674        1632 :     const double dfPIX2overR = dfPIX * dfPIXoverR;
    3675             :     // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3676             :     // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3677        1632 :     const double dfSinPIXoverR = sin(dfPIXoverR);
    3678        1632 :     const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3679        1632 :     const double dfSinPIXMulSinPIXoverR =
    3680        1632 :         (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3681        1632 :     return dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3682             : }
    3683             : 
    3684      106692 : static double GWKLanczosSinc4Values(double *padfValues)
    3685             : {
    3686      533460 :     for (int i = 0; i < 4; i++)
    3687             :     {
    3688      426768 :         if (padfValues[i] == 0.0)
    3689             :         {
    3690           0 :             padfValues[i] = 1.0;
    3691             :         }
    3692             :         else
    3693             :         {
    3694      426768 :             const double dfPIX = M_PI * padfValues[i];
    3695      426768 :             const double dfPIXoverR = dfPIX / 3;
    3696      426768 :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3697             :             // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3698             :             // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3699      426768 :             const double dfSinPIXoverR = sin(dfPIXoverR);
    3700      426768 :             const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3701      426768 :             const double dfSinPIXMulSinPIXoverR =
    3702      426768 :                 (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3703      426768 :             padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3704             :         }
    3705             :     }
    3706      106692 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3707             : }
    3708             : 
    3709             : /************************************************************************/
    3710             : /*                            GWKBilinear()                             */
    3711             : /************************************************************************/
    3712             : 
    3713     6339310 : static double GWKBilinear(double dfX)
    3714             : {
    3715     6339310 :     double dfAbsX = fabs(dfX);
    3716     6339310 :     if (dfAbsX <= 1.0)
    3717     5869990 :         return 1 - dfAbsX;
    3718             :     else
    3719      469322 :         return 0.0;
    3720             : }
    3721             : 
    3722      236458 : static double GWKBilinear4Values(double *padfValues)
    3723             : {
    3724      236458 :     double dfAbsX0 = fabs(padfValues[0]);
    3725      236458 :     double dfAbsX1 = fabs(padfValues[1]);
    3726      236458 :     double dfAbsX2 = fabs(padfValues[2]);
    3727      236458 :     double dfAbsX3 = fabs(padfValues[3]);
    3728      236458 :     if (dfAbsX0 <= 1.0)
    3729      236458 :         padfValues[0] = 1 - dfAbsX0;
    3730             :     else
    3731           0 :         padfValues[0] = 0.0;
    3732      236458 :     if (dfAbsX1 <= 1.0)
    3733      236458 :         padfValues[1] = 1 - dfAbsX1;
    3734             :     else
    3735           0 :         padfValues[1] = 0.0;
    3736      236458 :     if (dfAbsX2 <= 1.0)
    3737      236458 :         padfValues[2] = 1 - dfAbsX2;
    3738             :     else
    3739           0 :         padfValues[2] = 0.0;
    3740      236458 :     if (dfAbsX3 <= 1.0)
    3741      236442 :         padfValues[3] = 1 - dfAbsX3;
    3742             :     else
    3743          16 :         padfValues[3] = 0.0;
    3744      236458 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3745             : }
    3746             : 
    3747             : /************************************************************************/
    3748             : /*                              GWKCubic()                              */
    3749             : /************************************************************************/
    3750             : 
    3751       86934 : static double GWKCubic(double dfX)
    3752             : {
    3753       86934 :     return CubicKernel(dfX);
    3754             : }
    3755             : 
    3756     2963710 : static double GWKCubic4Values(double *padfValues)
    3757             : {
    3758     2963710 :     const double dfAbsX_0 = fabs(padfValues[0]);
    3759     2963710 :     const double dfAbsX_1 = fabs(padfValues[1]);
    3760     2963710 :     const double dfAbsX_2 = fabs(padfValues[2]);
    3761     2963710 :     const double dfAbsX_3 = fabs(padfValues[3]);
    3762     2963710 :     const double dfX2_0 = padfValues[0] * padfValues[0];
    3763     2963710 :     const double dfX2_1 = padfValues[1] * padfValues[1];
    3764     2963710 :     const double dfX2_2 = padfValues[2] * padfValues[2];
    3765     2963710 :     const double dfX2_3 = padfValues[3] * padfValues[3];
    3766             : 
    3767     2963710 :     double dfVal0 = 0.0;
    3768     2963710 :     if (dfAbsX_0 <= 1.0)
    3769     1117140 :         dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
    3770     1846570 :     else if (dfAbsX_0 <= 2.0)
    3771     1846400 :         dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
    3772             : 
    3773     2963710 :     double dfVal1 = 0.0;
    3774     2963710 :     if (dfAbsX_1 <= 1.0)
    3775     1844850 :         dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
    3776     1118860 :     else if (dfAbsX_1 <= 2.0)
    3777     1118860 :         dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
    3778             : 
    3779     2963710 :     double dfVal2 = 0.0;
    3780     2963710 :     if (dfAbsX_2 <= 1.0)
    3781     1855340 :         dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
    3782     1108360 :     else if (dfAbsX_2 <= 2.0)
    3783     1108360 :         dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
    3784             : 
    3785     2963710 :     double dfVal3 = 0.0;
    3786     2963710 :     if (dfAbsX_3 <= 1.0)
    3787     1127350 :         dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
    3788     1836360 :     else if (dfAbsX_3 <= 2.0)
    3789     1836200 :         dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
    3790             : 
    3791     2963710 :     padfValues[0] = dfVal0;
    3792     2963710 :     padfValues[1] = dfVal1;
    3793     2963710 :     padfValues[2] = dfVal2;
    3794     2963710 :     padfValues[3] = dfVal3;
    3795     2963710 :     return dfVal0 + dfVal1 + dfVal2 + dfVal3;
    3796             : }
    3797             : 
    3798             : /************************************************************************/
    3799             : /*                             GWKBSpline()                             */
    3800             : /************************************************************************/
    3801             : 
    3802             : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
    3803             : // Equation 8 with (B,C)=(1,0)
    3804             : // 1/6 * ( 3 * |x|^3 -  6 * |x|^2 + 4) |x| < 1
    3805             : // 1/6 * ( -|x|^3 + 6 |x|^2  - 12|x| + 8) |x| >= 1 and |x| < 2
    3806             : 
    3807      139200 : static double GWKBSpline(double x)
    3808             : {
    3809      139200 :     const double xp2 = x + 2.0;
    3810      139200 :     const double xp1 = x + 1.0;
    3811      139200 :     const double xm1 = x - 1.0;
    3812             : 
    3813             :     // This will most likely be used, so we'll compute it ahead of time to
    3814             :     // avoid stalling the processor.
    3815      139200 :     const double xp2c = xp2 * xp2 * xp2;
    3816             : 
    3817             :     // Note that the test is computed only if it is needed.
    3818             :     // TODO(schwehr): Make this easier to follow.
    3819             :     return xp2 > 0.0
    3820      278400 :                ? ((xp1 > 0.0)
    3821      139200 :                       ? ((x > 0.0)
    3822      124806 :                              ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3823       90308 :                                    6.0 * x * x * x
    3824             :                              : 0.0) +
    3825      124806 :                             -4.0 * xp1 * xp1 * xp1
    3826             :                       : 0.0) +
    3827             :                      xp2c
    3828      139200 :                : 0.0;  // * 0.166666666666666666666
    3829             : }
    3830             : 
    3831     2220680 : static double GWKBSpline4Values(double *padfValues)
    3832             : {
    3833    11103400 :     for (int i = 0; i < 4; i++)
    3834             :     {
    3835     8882740 :         const double x = padfValues[i];
    3836     8882740 :         const double xp2 = x + 2.0;
    3837     8882740 :         const double xp1 = x + 1.0;
    3838     8882740 :         const double xm1 = x - 1.0;
    3839             : 
    3840             :         // This will most likely be used, so we'll compute it ahead of time to
    3841             :         // avoid stalling the processor.
    3842     8882740 :         const double xp2c = xp2 * xp2 * xp2;
    3843             : 
    3844             :         // Note that the test is computed only if it is needed.
    3845             :         // TODO(schwehr): Make this easier to follow.
    3846     8882740 :         padfValues[i] =
    3847             :             (xp2 > 0.0)
    3848    17765500 :                 ? ((xp1 > 0.0)
    3849     8882740 :                        ? ((x > 0.0)
    3850     6661820 :                               ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3851     4438260 :                                     6.0 * x * x * x
    3852             :                               : 0.0) +
    3853     6661820 :                              -4.0 * xp1 * xp1 * xp1
    3854             :                        : 0.0) +
    3855             :                       xp2c
    3856             :                 : 0.0;  // * 0.166666666666666666666
    3857             :     }
    3858     2220680 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3859             : }
    3860             : /************************************************************************/
    3861             : /*                         GWKResampleWrkStruct                         */
    3862             : /************************************************************************/
    3863             : 
    3864             : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
    3865             : 
    3866             : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
    3867             :                                    double dfSrcX, double dfSrcY,
    3868             :                                    double *pdfDensity, double *pdfReal,
    3869             :                                    double *pdfImag,
    3870             :                                    GWKResampleWrkStruct *psWrkStruct);
    3871             : 
    3872             : struct _GWKResampleWrkStruct
    3873             : {
    3874             :     pfnGWKResampleType pfnGWKResample;
    3875             : 
    3876             :     // Space for saved X weights.
    3877             :     double *padfWeightsX;
    3878             :     bool *pabCalcX;
    3879             : 
    3880             :     double *padfWeightsY;       // Only used by GWKResampleOptimizedLanczos.
    3881             :     int iLastSrcX;              // Only used by GWKResampleOptimizedLanczos.
    3882             :     int iLastSrcY;              // Only used by GWKResampleOptimizedLanczos.
    3883             :     double dfLastDeltaX;        // Only used by GWKResampleOptimizedLanczos.
    3884             :     double dfLastDeltaY;        // Only used by GWKResampleOptimizedLanczos.
    3885             :     double dfCosPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3886             :     double dfSinPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3887             :     double dfCosPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3888             :     double dfSinPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3889             :     double dfCosPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3890             :     double dfSinPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3891             :     double dfCosPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3892             :     double dfSinPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3893             : 
    3894             :     // Space for saving a row of pixels.
    3895             :     double *padfRowDensity;
    3896             :     double *padfRowReal;
    3897             :     double *padfRowImag;
    3898             : };
    3899             : 
    3900             : /************************************************************************/
    3901             : /*                     GWKResampleCreateWrkStruct()                     */
    3902             : /************************************************************************/
    3903             : 
    3904             : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3905             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3906             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
    3907             : 
    3908             : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3909             :                                         double dfSrcX, double dfSrcY,
    3910             :                                         double *pdfDensity, double *pdfReal,
    3911             :                                         double *pdfImag,
    3912             :                                         GWKResampleWrkStruct *psWrkStruct);
    3913             : 
    3914         397 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
    3915             : {
    3916         397 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3917         397 :     const int nYDist = (poWK->nYRadius + 1) * 2;
    3918             : 
    3919             :     GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
    3920         397 :         CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
    3921             : 
    3922             :     // Alloc space for saved X weights.
    3923         397 :     psWrkStruct->padfWeightsX =
    3924         397 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3925         397 :     psWrkStruct->pabCalcX =
    3926         397 :         static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
    3927             : 
    3928         397 :     psWrkStruct->padfWeightsY =
    3929         397 :         static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
    3930         397 :     psWrkStruct->iLastSrcX = -10;
    3931         397 :     psWrkStruct->iLastSrcY = -10;
    3932         397 :     psWrkStruct->dfLastDeltaX = -10;
    3933         397 :     psWrkStruct->dfLastDeltaY = -10;
    3934             : 
    3935             :     // Alloc space for saving a row of pixels.
    3936         397 :     if (poWK->pafUnifiedSrcDensity == nullptr &&
    3937         363 :         poWK->panUnifiedSrcValid == nullptr &&
    3938         340 :         poWK->papanBandSrcValid == nullptr)
    3939             :     {
    3940         340 :         psWrkStruct->padfRowDensity = nullptr;
    3941             :     }
    3942             :     else
    3943             :     {
    3944          57 :         psWrkStruct->padfRowDensity =
    3945          57 :             static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3946             :     }
    3947         397 :     psWrkStruct->padfRowReal =
    3948         397 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3949         397 :     psWrkStruct->padfRowImag =
    3950         397 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3951             : 
    3952         397 :     if (poWK->eResample == GRA_Lanczos)
    3953             :     {
    3954          63 :         psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
    3955             : 
    3956          63 :         if (poWK->dfXScale < 1)
    3957             :         {
    3958           4 :             psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
    3959           4 :             psWrkStruct->dfSinPiXScaleOver3 =
    3960           4 :                 sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
    3961           4 :                              psWrkStruct->dfCosPiXScaleOver3);
    3962             :             // "Naive":
    3963             :             // const double dfCosPiXScale = cos(  M_PI * dfXScale );
    3964             :             // const double dfSinPiXScale = sin(  M_PI * dfXScale );
    3965             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3966           4 :             psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
    3967           4 :                                               psWrkStruct->dfCosPiXScaleOver3 -
    3968           4 :                                           3) *
    3969           4 :                                          psWrkStruct->dfCosPiXScaleOver3;
    3970           4 :             psWrkStruct->dfSinPiXScale = sqrt(
    3971           4 :                 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
    3972             :         }
    3973             : 
    3974          63 :         if (poWK->dfYScale < 1)
    3975             :         {
    3976          13 :             psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
    3977          13 :             psWrkStruct->dfSinPiYScaleOver3 =
    3978          13 :                 sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
    3979          13 :                              psWrkStruct->dfCosPiYScaleOver3);
    3980             :             // "Naive":
    3981             :             // const double dfCosPiYScale = cos(  M_PI * dfYScale );
    3982             :             // const double dfSinPiYScale = sin(  M_PI * dfYScale );
    3983             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3984          13 :             psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
    3985          13 :                                               psWrkStruct->dfCosPiYScaleOver3 -
    3986          13 :                                           3) *
    3987          13 :                                          psWrkStruct->dfCosPiYScaleOver3;
    3988          13 :             psWrkStruct->dfSinPiYScale = sqrt(
    3989          13 :                 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
    3990             :         }
    3991             :     }
    3992             :     else
    3993         334 :         psWrkStruct->pfnGWKResample = GWKResample;
    3994             : 
    3995         397 :     return psWrkStruct;
    3996             : }
    3997             : 
    3998             : /************************************************************************/
    3999             : /*                     GWKResampleDeleteWrkStruct()                     */
    4000             : /************************************************************************/
    4001             : 
    4002         397 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
    4003             : {
    4004         397 :     CPLFree(psWrkStruct->padfWeightsX);
    4005         397 :     CPLFree(psWrkStruct->padfWeightsY);
    4006         397 :     CPLFree(psWrkStruct->pabCalcX);
    4007         397 :     CPLFree(psWrkStruct->padfRowDensity);
    4008         397 :     CPLFree(psWrkStruct->padfRowReal);
    4009         397 :     CPLFree(psWrkStruct->padfRowImag);
    4010         397 :     CPLFree(psWrkStruct);
    4011         397 : }
    4012             : 
    4013             : /************************************************************************/
    4014             : /*                            GWKResample()                             */
    4015             : /************************************************************************/
    4016             : 
    4017      239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    4018             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    4019             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
    4020             : 
    4021             : {
    4022             :     // Save as local variables to avoid following pointers in loops.
    4023      239383 :     const int nSrcXSize = poWK->nSrcXSize;
    4024      239383 :     const int nSrcYSize = poWK->nSrcYSize;
    4025             : 
    4026      239383 :     double dfAccumulatorReal = 0.0;
    4027      239383 :     double dfAccumulatorImag = 0.0;
    4028      239383 :     double dfAccumulatorDensity = 0.0;
    4029      239383 :     double dfAccumulatorWeight = 0.0;
    4030      239383 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4031      239383 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4032      239383 :     const GPtrDiff_t iSrcOffset =
    4033      239383 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4034      239383 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4035      239383 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4036             : 
    4037      239383 :     const double dfXScale = poWK->dfXScale;
    4038      239383 :     const double dfYScale = poWK->dfYScale;
    4039             : 
    4040      239383 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    4041             : 
    4042             :     // Space for saved X weights.
    4043      239383 :     double *padfWeightsX = psWrkStruct->padfWeightsX;
    4044      239383 :     bool *pabCalcX = psWrkStruct->pabCalcX;
    4045             : 
    4046             :     // Space for saving a row of pixels.
    4047      239383 :     double *padfRowDensity = psWrkStruct->padfRowDensity;
    4048      239383 :     double *padfRowReal = psWrkStruct->padfRowReal;
    4049      239383 :     double *padfRowImag = psWrkStruct->padfRowImag;
    4050             : 
    4051             :     // Mark as needing calculation (don't calculate the weights yet,
    4052             :     // because a mask may render it unnecessary).
    4053      239383 :     memset(pabCalcX, false, nXDist * sizeof(bool));
    4054             : 
    4055      239383 :     FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
    4056      239383 :     CPLAssert(pfnGetWeight);
    4057             : 
    4058             :     // Skip sampling over edge of image.
    4059      239383 :     int j = poWK->nFiltInitY;
    4060      239383 :     int jMax = poWK->nYRadius;
    4061      239383 :     if (iSrcY + j < 0)
    4062         566 :         j = -iSrcY;
    4063      239383 :     if (iSrcY + jMax >= nSrcYSize)
    4064         662 :         jMax = nSrcYSize - iSrcY - 1;
    4065             : 
    4066      239383 :     int iMin = poWK->nFiltInitX;
    4067      239383 :     int iMax = poWK->nXRadius;
    4068      239383 :     if (iSrcX + iMin < 0)
    4069         566 :         iMin = -iSrcX;
    4070      239383 :     if (iSrcX + iMax >= nSrcXSize)
    4071         659 :         iMax = nSrcXSize - iSrcX - 1;
    4072             : 
    4073      239383 :     const int bXScaleBelow1 = (dfXScale < 1.0);
    4074      239383 :     const int bYScaleBelow1 = (dfYScale < 1.0);
    4075             : 
    4076      239383 :     GPtrDiff_t iRowOffset =
    4077      239383 :         iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
    4078             : 
    4079             :     // Loop over pixel rows in the kernel.
    4080     1445930 :     for (; j <= jMax; ++j)
    4081             :     {
    4082     1206540 :         iRowOffset += nSrcXSize;
    4083             : 
    4084             :         // Get pixel values.
    4085             :         // We can potentially read extra elements after the "normal" end of the
    4086             :         // source arrays, but the contract of papabySrcImage[iBand],
    4087             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4088             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4089     1206540 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4090             :                             padfRowDensity, padfRowReal, padfRowImag))
    4091          72 :             continue;
    4092             : 
    4093             :         // Calculate the Y weight.
    4094             :         double dfWeight1 = (bYScaleBelow1)
    4095     1206470 :                                ? pfnGetWeight((j - dfDeltaY) * dfYScale)
    4096        1600 :                                : pfnGetWeight(j - dfDeltaY);
    4097             : 
    4098             :         // Iterate over pixels in row.
    4099     1206470 :         double dfAccumulatorRealLocal = 0.0;
    4100     1206470 :         double dfAccumulatorImagLocal = 0.0;
    4101     1206470 :         double dfAccumulatorDensityLocal = 0.0;
    4102     1206470 :         double dfAccumulatorWeightLocal = 0.0;
    4103             : 
    4104     7317420 :         for (int i = iMin; i <= iMax; ++i)
    4105             :         {
    4106             :             // Skip sampling if pixel has zero density.
    4107     6110940 :             if (padfRowDensity != nullptr &&
    4108       77277 :                 padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
    4109         546 :                 continue;
    4110             : 
    4111     6110400 :             double dfWeight2 = 0.0;
    4112             : 
    4113             :             // Make or use a cached set of weights for this row.
    4114     6110400 :             if (pabCalcX[i - iMin])
    4115             :             {
    4116             :                 // Use saved weight value instead of recomputing it.
    4117     4903920 :                 dfWeight2 = padfWeightsX[i - iMin];
    4118             :             }
    4119             :             else
    4120             :             {
    4121             :                 // Calculate & save the X weight.
    4122     1206480 :                 padfWeightsX[i - iMin] = dfWeight2 =
    4123     1206480 :                     (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
    4124        1600 :                                     : pfnGetWeight(i - dfDeltaX);
    4125             : 
    4126     1206480 :                 pabCalcX[i - iMin] = true;
    4127             :             }
    4128             : 
    4129             :             // Accumulate!
    4130     6110400 :             dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
    4131     6110400 :             dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
    4132     6110400 :             if (padfRowDensity != nullptr)
    4133       76731 :                 dfAccumulatorDensityLocal +=
    4134       76731 :                     padfRowDensity[i - iMin] * dfWeight2;
    4135     6110400 :             dfAccumulatorWeightLocal += dfWeight2;
    4136             :         }
    4137             : 
    4138     1206470 :         dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
    4139     1206470 :         dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
    4140     1206470 :         dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
    4141     1206470 :         dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
    4142             :     }
    4143             : 
    4144      239383 :     if (dfAccumulatorWeight < 0.000001 ||
    4145        1887 :         (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
    4146             :     {
    4147           0 :         *pdfDensity = 0.0;
    4148           0 :         return false;
    4149             :     }
    4150             : 
    4151             :     // Calculate the output taking into account weighting.
    4152      239383 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4153             :     {
    4154      239380 :         *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
    4155      239380 :         *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
    4156      239380 :         if (padfRowDensity != nullptr)
    4157        1884 :             *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
    4158             :         else
    4159      237496 :             *pdfDensity = 1.0;
    4160             :     }
    4161             :     else
    4162             :     {
    4163           3 :         *pdfReal = dfAccumulatorReal;
    4164           3 :         *pdfImag = dfAccumulatorImag;
    4165           3 :         if (padfRowDensity != nullptr)
    4166           3 :             *pdfDensity = dfAccumulatorDensity;
    4167             :         else
    4168           0 :             *pdfDensity = 1.0;
    4169             :     }
    4170             : 
    4171      239383 :     return true;
    4172             : }
    4173             : 
    4174             : /************************************************************************/
    4175             : /*                    GWKResampleOptimizedLanczos()                     */
    4176             : /************************************************************************/
    4177             : 
    4178      617144 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    4179             :                                         double dfSrcX, double dfSrcY,
    4180             :                                         double *pdfDensity, double *pdfReal,
    4181             :                                         double *pdfImag,
    4182             :                                         GWKResampleWrkStruct *psWrkStruct)
    4183             : 
    4184             : {
    4185             :     // Save as local variables to avoid following pointers in loops.
    4186      617144 :     const int nSrcXSize = poWK->nSrcXSize;
    4187      617144 :     const int nSrcYSize = poWK->nSrcYSize;
    4188             : 
    4189      617144 :     double dfAccumulatorReal = 0.0;
    4190      617144 :     double dfAccumulatorImag = 0.0;
    4191      617144 :     double dfAccumulatorDensity = 0.0;
    4192      617144 :     double dfAccumulatorWeight = 0.0;
    4193      617144 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4194      617144 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4195      617144 :     const GPtrDiff_t iSrcOffset =
    4196      617144 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4197      617144 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4198      617144 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4199             : 
    4200      617144 :     const double dfXScale = poWK->dfXScale;
    4201      617144 :     const double dfYScale = poWK->dfYScale;
    4202             : 
    4203             :     // Space for saved X weights.
    4204      617144 :     double *const padfWeightsXShifted =
    4205      617144 :         psWrkStruct->padfWeightsX - poWK->nFiltInitX;
    4206      617144 :     double *const padfWeightsYShifted =
    4207      617144 :         psWrkStruct->padfWeightsY - poWK->nFiltInitY;
    4208             : 
    4209             :     // Space for saving a row of pixels.
    4210      617144 :     double *const padfRowDensity = psWrkStruct->padfRowDensity;
    4211      617144 :     double *const padfRowReal = psWrkStruct->padfRowReal;
    4212      617144 :     double *const padfRowImag = psWrkStruct->padfRowImag;
    4213             : 
    4214             :     // Skip sampling over edge of image.
    4215      617144 :     int jMin = poWK->nFiltInitY;
    4216      617144 :     int jMax = poWK->nYRadius;
    4217      617144 :     if (iSrcY + jMin < 0)
    4218       16572 :         jMin = -iSrcY;
    4219      617144 :     if (iSrcY + jMax >= nSrcYSize)
    4220        5782 :         jMax = nSrcYSize - iSrcY - 1;
    4221             : 
    4222      617144 :     int iMin = poWK->nFiltInitX;
    4223      617144 :     int iMax = poWK->nXRadius;
    4224      617144 :     if (iSrcX + iMin < 0)
    4225       15797 :         iMin = -iSrcX;
    4226      617144 :     if (iSrcX + iMax >= nSrcXSize)
    4227        4657 :         iMax = nSrcXSize - iSrcX - 1;
    4228             : 
    4229      617144 :     if (dfXScale < 1.0)
    4230             :     {
    4231      403041 :         while ((iMin - dfDeltaX) * dfXScale < -3.0)
    4232      200179 :             iMin++;
    4233      202862 :         while ((iMax - dfDeltaX) * dfXScale > 3.0)
    4234           0 :             iMax--;
    4235             : 
    4236             :         // clang-format off
    4237             :         /*
    4238             :         Naive version:
    4239             :         for (int i = iMin; i <= iMax; ++i)
    4240             :         {
    4241             :             psWrkStruct->padfWeightsXShifted[i] =
    4242             :                 GWKLanczosSinc((i - dfDeltaX) * dfXScale);
    4243             :         }
    4244             : 
    4245             :         but given that:
    4246             : 
    4247             :         GWKLanczosSinc(x):
    4248             :             if (dfX == 0.0)
    4249             :                 return 1.0;
    4250             : 
    4251             :             const double dfPIX = M_PI * dfX;
    4252             :             const double dfPIXoverR = dfPIX / 3;
    4253             :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    4254             :             return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
    4255             : 
    4256             :         and
    4257             :             sin (a + b) = sin a cos b + cos a sin b.
    4258             :             cos (a + b) = cos a cos b - sin a sin b.
    4259             : 
    4260             :         we can skip any sin() computation within the loop
    4261             :         */
    4262             :         // clang-format on
    4263             : 
    4264      202862 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    4265      131072 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    4266             :         {
    4267       71790 :             double dfX = (iMin - dfDeltaX) * dfXScale;
    4268             : 
    4269       71790 :             double dfPIXover3 = M_PI / 3 * dfX;
    4270       71790 :             double dfCosOver3 = cos(dfPIXover3);
    4271       71790 :             double dfSinOver3 = sin(dfPIXover3);
    4272             : 
    4273             :             // "Naive":
    4274             :             // double dfSin = sin( M_PI * dfX );
    4275             :             // double dfCos = cos( M_PI * dfX );
    4276             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4277       71790 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4278       71790 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4279             : 
    4280       71790 :             const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
    4281       71790 :             const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
    4282       71790 :             const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
    4283       71790 :             const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
    4284       71790 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4285       71790 :             padfWeightsXShifted[iMin] =
    4286       71790 :                 dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
    4287     1636480 :             for (int i = iMin + 1; i <= iMax; ++i)
    4288             :             {
    4289     1564690 :                 dfX += dfXScale;
    4290     1564690 :                 const double dfNewSin =
    4291     1564690 :                     dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
    4292     1564690 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
    4293     1564690 :                                              dfCosOver3 * dfSinPiXScaleOver3;
    4294     1564690 :                 padfWeightsXShifted[i] =
    4295             :                     dfX == 0
    4296     1564690 :                         ? 1.0
    4297     1564690 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
    4298     1564690 :                 const double dfNewCos =
    4299     1564690 :                     dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
    4300     1564690 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
    4301     1564690 :                                              dfSinOver3 * dfSinPiXScaleOver3;
    4302     1564690 :                 dfSin = dfNewSin;
    4303     1564690 :                 dfCos = dfNewCos;
    4304     1564690 :                 dfSinOver3 = dfNewSinOver3;
    4305     1564690 :                 dfCosOver3 = dfNewCosOver3;
    4306             :             }
    4307             : 
    4308       71790 :             psWrkStruct->iLastSrcX = iSrcX;
    4309       71790 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4310             :         }
    4311             :     }
    4312             :     else
    4313             :     {
    4314      757542 :         while (iMin - dfDeltaX < -3.0)
    4315      343260 :             iMin++;
    4316      414282 :         while (iMax - dfDeltaX > 3.0)
    4317           0 :             iMax--;
    4318             : 
    4319      414282 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    4320      209580 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    4321             :         {
    4322             :             // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
    4323             :             // following trigonometric formulas.
    4324             : 
    4325             :             // TODO(schwehr): Move this somewhere where it can be rendered at
    4326             :             // LaTeX.
    4327             :             // clang-format off
    4328             :             // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
    4329             :             //                            cos(M_PI * dfBase) * sin(M_PI * k)
    4330             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
    4331             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
    4332             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
    4333             : 
    4334             :             // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
    4335             :             //                                  cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
    4336             :             // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
    4337             :             // clang-format on
    4338             : 
    4339      414282 :             const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
    4340      414282 :             const double dfSin2PIDeltaXOver3 =
    4341             :                 dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
    4342             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
    4343      414282 :             const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
    4344      414282 :             const double dfSinPIDeltaX =
    4345      414282 :                 (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
    4346      414282 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4347      414282 :             const double dfInvPI2Over3xSinPIDeltaX =
    4348             :                 dfInvPI2Over3 * dfSinPIDeltaX;
    4349      414282 :             const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
    4350      414282 :                 -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
    4351      414282 :             const double dfSinPIOver3 = 0.8660254037844386;
    4352      414282 :             const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
    4353      414282 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
    4354             :             const double padfCst[] = {
    4355      414282 :                 dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
    4356      414282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
    4357             :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
    4358      414282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
    4359      414282 :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
    4360             : 
    4361     2936860 :             for (int i = iMin; i <= iMax; ++i)
    4362             :             {
    4363     2522570 :                 const double dfX = i - dfDeltaX;
    4364     2522570 :                 if (dfX == 0.0)
    4365       58282 :                     padfWeightsXShifted[i] = 1.0;
    4366             :                 else
    4367     2464290 :                     padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
    4368             : #if DEBUG_VERBOSE
    4369             :                     // TODO(schwehr): AlmostEqual.
    4370             :                     // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
    4371             :                     //               GWKLanczosSinc(dfX, 3.0)) < 1e-10);
    4372             : #endif
    4373             :             }
    4374             : 
    4375      414282 :             psWrkStruct->iLastSrcX = iSrcX;
    4376      414282 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4377             :         }
    4378             :     }
    4379             : 
    4380      617144 :     if (dfYScale < 1.0)
    4381             :     {
    4382      406666 :         while ((jMin - dfDeltaY) * dfYScale < -3.0)
    4383      203804 :             jMin++;
    4384      206462 :         while ((jMax - dfDeltaY) * dfYScale > 3.0)
    4385        3600 :             jMax--;
    4386             : 
    4387             :         // clang-format off
    4388             :         /*
    4389             :         Naive version:
    4390             :         for (int j = jMin; j <= jMax; ++j)
    4391             :         {
    4392             :             padfWeightsYShifted[j] =
    4393             :                 GWKLanczosSinc((j - dfDeltaY) * dfYScale);
    4394             :         }
    4395             :         */
    4396             :         // clang-format on
    4397             : 
    4398      202862 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4399      202479 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4400             :         {
    4401         383 :             double dfY = (jMin - dfDeltaY) * dfYScale;
    4402             : 
    4403         383 :             double dfPIYover3 = M_PI / 3 * dfY;
    4404         383 :             double dfCosOver3 = cos(dfPIYover3);
    4405         383 :             double dfSinOver3 = sin(dfPIYover3);
    4406             : 
    4407             :             // "Naive":
    4408             :             // double dfSin = sin( M_PI * dfY );
    4409             :             // double dfCos = cos( M_PI * dfY );
    4410             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4411         383 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4412         383 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4413             : 
    4414         383 :             const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
    4415         383 :             const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
    4416         383 :             const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
    4417         383 :             const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
    4418         383 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4419         383 :             padfWeightsYShifted[jMin] =
    4420         383 :                 dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
    4421        7318 :             for (int j = jMin + 1; j <= jMax; ++j)
    4422             :             {
    4423        6935 :                 dfY += dfYScale;
    4424        6935 :                 const double dfNewSin =
    4425        6935 :                     dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
    4426        6935 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
    4427        6935 :                                              dfCosOver3 * dfSinPiYScaleOver3;
    4428        6935 :                 padfWeightsYShifted[j] =
    4429             :                     dfY == 0
    4430        6935 :                         ? 1.0
    4431        6935 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
    4432        6935 :                 const double dfNewCos =
    4433        6935 :                     dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
    4434        6935 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
    4435        6935 :                                              dfSinOver3 * dfSinPiYScaleOver3;
    4436        6935 :                 dfSin = dfNewSin;
    4437        6935 :                 dfCos = dfNewCos;
    4438        6935 :                 dfSinOver3 = dfNewSinOver3;
    4439        6935 :                 dfCosOver3 = dfNewCosOver3;
    4440             :             }
    4441             : 
    4442         383 :             psWrkStruct->iLastSrcY = iSrcY;
    4443         383 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4444             :         }
    4445             :     }
    4446             :     else
    4447             :     {
    4448      681542 :         while (jMin - dfDeltaY < -3.0)
    4449      267260 :             jMin++;
    4450      414282 :         while (jMax - dfDeltaY > 3.0)
    4451           0 :             jMax--;
    4452             : 
    4453      414282 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4454      413631 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4455             :         {
    4456        1132 :             const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
    4457        1132 :             const double dfSin2PIDeltaYOver3 =
    4458             :                 dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
    4459             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
    4460        1132 :             const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
    4461        1132 :             const double dfSinPIDeltaY =
    4462        1132 :                 (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
    4463        1132 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4464        1132 :             const double dfInvPI2Over3xSinPIDeltaY =
    4465             :                 dfInvPI2Over3 * dfSinPIDeltaY;
    4466        1132 :             const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
    4467        1132 :                 -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
    4468        1132 :             const double dfSinPIOver3 = 0.8660254037844386;
    4469        1132 :             const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
    4470        1132 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
    4471             :             const double padfCst[] = {
    4472        1132 :                 dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
    4473        1132 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
    4474             :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
    4475        1132 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
    4476        1132 :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
    4477             : 
    4478        7925 :             for (int j = jMin; j <= jMax; ++j)
    4479             :             {
    4480        6793 :                 const double dfY = j - dfDeltaY;
    4481        6793 :                 if (dfY == 0.0)
    4482         468 :                     padfWeightsYShifted[j] = 1.0;
    4483             :                 else
    4484        6325 :                     padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
    4485             : #if DEBUG_VERBOSE
    4486             :                     // TODO(schwehr): AlmostEqual.
    4487             :                     // CPLAssert(fabs(padfWeightsYShifted[j] -
    4488             :                     //               GWKLanczosSinc(dfY, 3.0)) < 1e-10);
    4489             : #endif
    4490             :             }
    4491             : 
    4492        1132 :             psWrkStruct->iLastSrcY = iSrcY;
    4493        1132 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4494             :         }
    4495             :     }
    4496             : 
    4497             :     // If we have no density information, we can simply compute the
    4498             :     // accumulated weight.
    4499      617144 :     if (padfRowDensity == nullptr)
    4500             :     {
    4501      617144 :         double dfRowAccWeight = 0.0;
    4502     7903490 :         for (int i = iMin; i <= iMax; ++i)
    4503             :         {
    4504     7286350 :             dfRowAccWeight += padfWeightsXShifted[i];
    4505             :         }
    4506      617144 :         double dfColAccWeight = 0.0;
    4507     7961240 :         for (int j = jMin; j <= jMax; ++j)
    4508             :         {
    4509     7344100 :             dfColAccWeight += padfWeightsYShifted[j];
    4510             :         }
    4511      617144 :         dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
    4512             :     }
    4513             : 
    4514             :     // Loop over pixel rows in the kernel.
    4515             : 
    4516      617144 :     if (poWK->eWorkingDataType == GDT_UInt8 && !poWK->panUnifiedSrcValid &&
    4517      616524 :         !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
    4518             :         !padfRowDensity)
    4519             :     {
    4520             :         // Optimization for Byte case without any masking/alpha
    4521             : 
    4522      616524 :         if (dfAccumulatorWeight < 0.000001)
    4523             :         {
    4524           0 :             *pdfDensity = 0.0;
    4525           0 :             return false;
    4526             :         }
    4527             : 
    4528      616524 :         const GByte *pSrc =
    4529      616524 :             reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
    4530      616524 :         pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4531             : 
    4532             : #if defined(USE_SSE2)
    4533      616524 :         if (iMax - iMin + 1 == 6)
    4534             :         {
    4535             :             // This is just an optimized version of the general case in
    4536             :             // the else clause.
    4537             : 
    4538      346854 :             pSrc += iMin;
    4539      346854 :             int j = jMin;
    4540             :             const auto fourXWeights =
    4541      346854 :                 XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
    4542             : 
    4543             :             // Process 2 lines at the same time.
    4544     1375860 :             for (; j < jMax; j += 2)
    4545             :             {
    4546             :                 const XMMReg4Double v_acc =
    4547     1029000 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4548             :                 const XMMReg4Double v_acc2 =
    4549     1029000 :                     XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
    4550     1029000 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4551     1029000 :                 const double dfRowAccEnd =
    4552     1029000 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4553     1029000 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4554     1029000 :                 dfAccumulatorReal +=
    4555     1029000 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4556     1029000 :                 const double dfRowAcc2 = v_acc2.GetHorizSum();
    4557     1029000 :                 const double dfRowAcc2End =
    4558     1029000 :                     pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
    4559     1029000 :                     pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
    4560     1029000 :                 dfAccumulatorReal +=
    4561     1029000 :                     (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
    4562     1029000 :                 pSrc += 2 * nSrcXSize;
    4563             :             }
    4564      346854 :             if (j == jMax)
    4565             :             {
    4566             :                 // Process last line if there's an odd number of them.
    4567             : 
    4568             :                 const XMMReg4Double v_acc =
    4569       88077 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4570       88077 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4571       88077 :                 const double dfRowAccEnd =
    4572       88077 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4573       88077 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4574       88077 :                 dfAccumulatorReal +=
    4575       88077 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4576             :             }
    4577             :         }
    4578             :         else
    4579             : #endif
    4580             :         {
    4581     5464740 :             for (int j = jMin; j <= jMax; ++j)
    4582             :             {
    4583     5195070 :                 int i = iMin;
    4584     5195070 :                 double dfRowAcc1 = 0.0;
    4585     5195070 :                 double dfRowAcc2 = 0.0;
    4586             :                 // A bit of loop unrolling
    4587    62755200 :                 for (; i < iMax; i += 2)
    4588             :                 {
    4589    57560100 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4590    57560100 :                     dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
    4591             :                 }
    4592     5195070 :                 if (i == iMax)
    4593             :                 {
    4594             :                     // Process last column if there's an odd number of them.
    4595      427335 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4596             :                 }
    4597             : 
    4598     5195070 :                 dfAccumulatorReal +=
    4599     5195070 :                     (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
    4600     5195070 :                 pSrc += nSrcXSize;
    4601             :             }
    4602             :         }
    4603             : 
    4604             :         // Calculate the output taking into account weighting.
    4605      616524 :         if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4606             :         {
    4607      562318 :             const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4608      562318 :             *pdfReal = dfAccumulatorReal * dfInvAcc;
    4609      562318 :             *pdfDensity = 1.0;
    4610             :         }
    4611             :         else
    4612             :         {
    4613       54206 :             *pdfReal = dfAccumulatorReal;
    4614       54206 :             *pdfDensity = 1.0;
    4615             :         }
    4616             : 
    4617      616524 :         return true;
    4618             :     }
    4619             : 
    4620         620 :     GPtrDiff_t iRowOffset =
    4621         620 :         iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
    4622             : 
    4623         620 :     int nCountValid = 0;
    4624         620 :     const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
    4625             : 
    4626        3560 :     for (int j = jMin; j <= jMax; ++j)
    4627             :     {
    4628        2940 :         iRowOffset += nSrcXSize;
    4629             : 
    4630             :         // Get pixel values.
    4631             :         // We can potentially read extra elements after the "normal" end of the
    4632             :         // source arrays, but the contract of papabySrcImage[iBand],
    4633             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4634             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4635        2940 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4636             :                             padfRowDensity, padfRowReal, padfRowImag))
    4637           0 :             continue;
    4638             : 
    4639        2940 :         const double dfWeight1 = padfWeightsYShifted[j];
    4640             : 
    4641             :         // Iterate over pixels in row.
    4642        2940 :         if (padfRowDensity != nullptr)
    4643             :         {
    4644           0 :             for (int i = iMin; i <= iMax; ++i)
    4645             :             {
    4646             :                 // Skip sampling if pixel has zero density.
    4647           0 :                 if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
    4648           0 :                     continue;
    4649             : 
    4650           0 :                 nCountValid++;
    4651             : 
    4652             :                 //  Use a cached set of weights for this row.
    4653           0 :                 const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
    4654             : 
    4655             :                 // Accumulate!
    4656           0 :                 dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
    4657           0 :                 dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
    4658           0 :                 dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
    4659           0 :                 dfAccumulatorWeight += dfWeight2;
    4660             :             }
    4661             :         }
    4662        2940 :         else if (bIsNonComplex)
    4663             :         {
    4664        1764 :             double dfRowAccReal = 0.0;
    4665       10560 :             for (int i = iMin; i <= iMax; ++i)
    4666             :             {
    4667        8796 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4668             : 
    4669             :                 // Accumulate!
    4670        8796 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4671             :             }
    4672             : 
    4673        1764 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4674             :         }
    4675             :         else
    4676             :         {
    4677        1176 :             double dfRowAccReal = 0.0;
    4678        1176 :             double dfRowAccImag = 0.0;
    4679        7040 :             for (int i = iMin; i <= iMax; ++i)
    4680             :             {
    4681        5864 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4682             : 
    4683             :                 // Accumulate!
    4684        5864 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4685        5864 :                 dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
    4686             :             }
    4687             : 
    4688        1176 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4689        1176 :             dfAccumulatorImag += dfRowAccImag * dfWeight1;
    4690             :         }
    4691             :     }
    4692             : 
    4693         620 :     if (dfAccumulatorWeight < 0.000001 ||
    4694           0 :         (padfRowDensity != nullptr &&
    4695           0 :          (dfAccumulatorDensity < 0.000001 ||
    4696           0 :           nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
    4697             :     {
    4698           0 :         *pdfDensity = 0.0;
    4699           0 :         return false;
    4700             :     }
    4701             : 
    4702             :     // Calculate the output taking into account weighting.
    4703         620 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4704             :     {
    4705           0 :         const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4706           0 :         *pdfReal = dfAccumulatorReal * dfInvAcc;
    4707           0 :         *pdfImag = dfAccumulatorImag * dfInvAcc;
    4708           0 :         if (padfRowDensity != nullptr)
    4709           0 :             *pdfDensity = dfAccumulatorDensity * dfInvAcc;
    4710             :         else
    4711           0 :             *pdfDensity = 1.0;
    4712             :     }
    4713             :     else
    4714             :     {
    4715         620 :         *pdfReal = dfAccumulatorReal;
    4716         620 :         *pdfImag = dfAccumulatorImag;
    4717         620 :         if (padfRowDensity != nullptr)
    4718           0 :             *pdfDensity = dfAccumulatorDensity;
    4719             :         else
    4720         620 :             *pdfDensity = 1.0;
    4721             :     }
    4722             : 
    4723         620 :     return true;
    4724             : }
    4725             : 
    4726             : /************************************************************************/
    4727             : /*                         GWKComputeWeights()                          */
    4728             : /************************************************************************/
    4729             : 
    4730     1222150 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
    4731             :                               double dfDeltaX, double dfXScale, int jMin,
    4732             :                               int jMax, double dfDeltaY, double dfYScale,
    4733             :                               double *padfWeightsHorizontal,
    4734             :                               double *padfWeightsVertical, double &dfInvWeights)
    4735             : {
    4736             : 
    4737     1222150 :     const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
    4738     1222150 :     CPLAssert(pfnGetWeight);
    4739     1222150 :     const FilterFunc4ValuesType pfnGetWeight4Values =
    4740     1222150 :         apfGWKFilter4Values[eResample];
    4741     1222150 :     CPLAssert(pfnGetWeight4Values);
    4742             : 
    4743     1222150 :     int i = iMin;  // Used after for.
    4744     1222150 :     int iC = 0;    // Used after for.
    4745             :     // Not zero, but as close as possible to it, to avoid potential division by
    4746             :     // zero at end of function
    4747     1222150 :     double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
    4748     2990960 :     for (; i + 2 < iMax; i += 4, iC += 4)
    4749             :     {
    4750     1768820 :         padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
    4751     1768820 :         padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
    4752     1768820 :         padfWeightsHorizontal[iC + 2] =
    4753     1768820 :             padfWeightsHorizontal[iC + 1] + dfXScale;
    4754     1768820 :         padfWeightsHorizontal[iC + 3] =
    4755     1768820 :             padfWeightsHorizontal[iC + 2] + dfXScale;
    4756     1768820 :         dfAccumulatorWeightHorizontal +=
    4757     1768820 :             pfnGetWeight4Values(padfWeightsHorizontal + iC);
    4758             :     }
    4759     1280860 :     for (; i <= iMax; ++i, ++iC)
    4760             :     {
    4761       58719 :         const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
    4762       58719 :         padfWeightsHorizontal[iC] = dfWeight;
    4763       58719 :         dfAccumulatorWeightHorizontal += dfWeight;
    4764             :     }
    4765             : 
    4766     1222150 :     int j = jMin;  // Used after for.
    4767     1222150 :     int jC = 0;    // Used after for.
    4768             :     // Not zero, but as close as possible to it, to avoid potential division by
    4769             :     // zero at end of function
    4770     1222150 :     double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
    4771     2984620 :     for (; j + 2 < jMax; j += 4, jC += 4)
    4772             :     {
    4773     1762470 :         padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
    4774     1762470 :         padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
    4775     1762470 :         padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
    4776     1762470 :         padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
    4777     1762470 :         dfAccumulatorWeightVertical +=
    4778     1762470 :             pfnGetWeight4Values(padfWeightsVertical + jC);
    4779             :     }
    4780     1288930 :     for (; j <= jMax; ++j, ++jC)
    4781             :     {
    4782       66786 :         const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
    4783       66786 :         padfWeightsVertical[jC] = dfWeight;
    4784       66786 :         dfAccumulatorWeightVertical += dfWeight;
    4785             :     }
    4786             : 
    4787     1222150 :     dfInvWeights =
    4788     1222150 :         1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
    4789     1222150 : }
    4790             : 
    4791             : /************************************************************************/
    4792             : /*                        GWKResampleNoMasksT()                         */
    4793             : /************************************************************************/
    4794             : 
    4795             : template <class T>
    4796             : static bool
    4797             : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    4798             :                     double dfSrcY, T *pValue, double *padfWeightsHorizontal,
    4799             :                     double *padfWeightsVertical, double &dfInvWeights)
    4800             : 
    4801             : {
    4802             :     // Commonly used; save locally.
    4803             :     const int nSrcXSize = poWK->nSrcXSize;
    4804             :     const int nSrcYSize = poWK->nSrcYSize;
    4805             : 
    4806             :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4807             :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4808             :     const GPtrDiff_t iSrcOffset =
    4809             :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4810             : 
    4811             :     const int nXRadius = poWK->nXRadius;
    4812             :     const int nYRadius = poWK->nYRadius;
    4813             : 
    4814             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4815             :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4816             :         nYRadius > nSrcYSize)
    4817             :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4818             :                                                   pValue);
    4819             : 
    4820             :     T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    4821             :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4822             :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4823             : 
    4824             :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4825             :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4826             : 
    4827             :     int iMin = 1 - nXRadius;
    4828             :     if (iSrcX + iMin < 0)
    4829             :         iMin = -iSrcX;
    4830             :     int iMax = nXRadius;
    4831             :     if (iSrcX + iMax >= nSrcXSize - 1)
    4832             :         iMax = nSrcXSize - 1 - iSrcX;
    4833             : 
    4834             :     int jMin = 1 - nYRadius;
    4835             :     if (iSrcY + jMin < 0)
    4836             :         jMin = -iSrcY;
    4837             :     int jMax = nYRadius;
    4838             :     if (iSrcY + jMax >= nSrcYSize - 1)
    4839             :         jMax = nSrcYSize - 1 - iSrcY;
    4840             : 
    4841             :     if (iBand == 0)
    4842             :     {
    4843             :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4844             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4845             :                           padfWeightsVertical, dfInvWeights);
    4846             :     }
    4847             : 
    4848             :     // Loop over all rows in the kernel.
    4849             :     double dfAccumulator = 0.0;
    4850             :     for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
    4851             :     {
    4852             :         const GPtrDiff_t iSampJ =
    4853             :             iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
    4854             : 
    4855             :         // Loop over all pixels in the row.
    4856             :         double dfAccumulatorLocal = 0.0;
    4857             :         double dfAccumulatorLocal2 = 0.0;
    4858             :         int iC = 0;
    4859             :         int i = iMin;
    4860             :         // Process by chunk of 4 cols.
    4861             :         for (; i + 2 < iMax; i += 4, iC += 4)
    4862             :         {
    4863             :             // Retrieve the pixel & accumulate.
    4864             :             dfAccumulatorLocal +=
    4865             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4866             :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    4867             :                                   padfWeightsHorizontal[iC + 1];
    4868             :             dfAccumulatorLocal2 += double(pSrcBand[i + 2 + iSampJ]) *
    4869             :                                    padfWeightsHorizontal[iC + 2];
    4870             :             dfAccumulatorLocal2 += double(pSrcBand[i + 3 + iSampJ]) *
    4871             :                                    padfWeightsHorizontal[iC + 3];
    4872             :         }
    4873             :         dfAccumulatorLocal += dfAccumulatorLocal2;
    4874             :         if (i < iMax)
    4875             :         {
    4876             :             dfAccumulatorLocal +=
    4877             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4878             :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    4879             :                                   padfWeightsHorizontal[iC + 1];
    4880             :             i += 2;
    4881             :             iC += 2;
    4882             :         }
    4883             :         if (i == iMax)
    4884             :         {
    4885             :             dfAccumulatorLocal +=
    4886             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4887             :         }
    4888             : 
    4889             :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4890             :     }
    4891             : 
    4892             :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4893             : 
    4894             :     return true;
    4895             : }
    4896             : 
    4897             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    4898             : /* Could possibly be used too on 32bit, but we would need to check at runtime */
    4899             : #if defined(USE_SSE2)
    4900             : 
    4901             : /************************************************************************/
    4902             : /*                     GWKResampleNoMasks_SSE2_T()                      */
    4903             : /************************************************************************/
    4904             : 
    4905             : template <class T>
    4906     1775366 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
    4907             :                                       double dfSrcX, double dfSrcY, T *pValue,
    4908             :                                       double *padfWeightsHorizontal,
    4909             :                                       double *padfWeightsVertical,
    4910             :                                       double &dfInvWeights)
    4911             : {
    4912             :     // Commonly used; save locally.
    4913     1775366 :     const int nSrcXSize = poWK->nSrcXSize;
    4914     1775366 :     const int nSrcYSize = poWK->nSrcYSize;
    4915             : 
    4916     1775366 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4917     1775366 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4918     1775366 :     const GPtrDiff_t iSrcOffset =
    4919     1775366 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4920     1775366 :     const int nXRadius = poWK->nXRadius;
    4921     1775366 :     const int nYRadius = poWK->nYRadius;
    4922             : 
    4923             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4924     1775366 :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4925             :         nYRadius > nSrcYSize)
    4926           3 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4927           3 :                                                   pValue);
    4928             : 
    4929     1775364 :     const T *pSrcBand =
    4930     1775364 :         reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    4931             : 
    4932     1775364 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4933     1775364 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4934     1775364 :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4935     1775364 :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4936             : 
    4937     1775364 :     int iMin = 1 - nXRadius;
    4938     1775364 :     if (iSrcX + iMin < 0)
    4939       22616 :         iMin = -iSrcX;
    4940     1775364 :     int iMax = nXRadius;
    4941     1775364 :     if (iSrcX + iMax >= nSrcXSize - 1)
    4942        9506 :         iMax = nSrcXSize - 1 - iSrcX;
    4943             : 
    4944     1775364 :     int jMin = 1 - nYRadius;
    4945     1775364 :     if (iSrcY + jMin < 0)
    4946       26049 :         jMin = -iSrcY;
    4947     1775364 :     int jMax = nYRadius;
    4948     1775364 :     if (iSrcY + jMax >= nSrcYSize - 1)
    4949       13135 :         jMax = nSrcYSize - 1 - iSrcY;
    4950             : 
    4951     1775364 :     if (iBand == 0)
    4952             :     {
    4953     1222146 :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4954             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4955             :                           padfWeightsVertical, dfInvWeights);
    4956             :     }
    4957             : 
    4958     1775364 :     GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4959             :     // Process by chunk of 4 rows.
    4960     1775364 :     int jC = 0;
    4961     1775364 :     int j = jMin;
    4962     1775364 :     double dfAccumulator = 0.0;
    4963     5023910 :     for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
    4964             :     {
    4965             :         // Loop over all pixels in the row.
    4966     3248546 :         int iC = 0;
    4967     3248546 :         int i = iMin;
    4968             :         // Process by chunk of 4 cols.
    4969     3248546 :         XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
    4970     3248546 :         XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
    4971     3248546 :         XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
    4972     3248546 :         XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
    4973    11835082 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4974             :         {
    4975             :             // Retrieve the pixel & accumulate.
    4976     8586546 :             XMMReg4Double v_pixels_1 =
    4977     8586546 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4978     8586546 :             XMMReg4Double v_pixels_2 =
    4979     8586546 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
    4980     8586546 :             XMMReg4Double v_pixels_3 =
    4981     8586546 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4982     8586546 :             XMMReg4Double v_pixels_4 =
    4983     8586546 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4984             : 
    4985     8586546 :             XMMReg4Double v_padfWeight =
    4986     8586546 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4987             : 
    4988     8586546 :             v_acc_1 += v_pixels_1 * v_padfWeight;
    4989     8586546 :             v_acc_2 += v_pixels_2 * v_padfWeight;
    4990     8586546 :             v_acc_3 += v_pixels_3 * v_padfWeight;
    4991     8586546 :             v_acc_4 += v_pixels_4 * v_padfWeight;
    4992             :         }
    4993             : 
    4994     3248546 :         if (i < iMax)
    4995             :         {
    4996       49932 :             XMMReg2Double v_pixels_1 =
    4997       49932 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
    4998       49932 :             XMMReg2Double v_pixels_2 =
    4999       49932 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
    5000       49932 :             XMMReg2Double v_pixels_3 =
    5001       49932 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    5002       49932 :             XMMReg2Double v_pixels_4 =
    5003       49932 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    5004             : 
    5005       49932 :             XMMReg2Double v_padfWeight =
    5006       49932 :                 XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
    5007             : 
    5008       49932 :             v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
    5009       49932 :             v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
    5010       49932 :             v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
    5011       49932 :             v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
    5012             : 
    5013       49932 :             i += 2;
    5014       49932 :             iC += 2;
    5015             :         }
    5016             : 
    5017     3248546 :         double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
    5018     3248546 :         double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
    5019     3248546 :         double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
    5020     3248546 :         double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
    5021             : 
    5022     3248546 :         if (i == iMax)
    5023             :         {
    5024       27545 :             dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
    5025       27545 :                                     padfWeightsHorizontal[iC];
    5026       27545 :             dfAccumulatorLocal_2 +=
    5027       27545 :                 static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
    5028       27545 :                 padfWeightsHorizontal[iC];
    5029       27545 :             dfAccumulatorLocal_3 +=
    5030       27545 :                 static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
    5031       27545 :                 padfWeightsHorizontal[iC];
    5032       27545 :             dfAccumulatorLocal_4 +=
    5033       27545 :                 static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
    5034       27545 :                 padfWeightsHorizontal[iC];
    5035             :         }
    5036             : 
    5037     3248546 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
    5038     3248546 :         dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
    5039     3248546 :         dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
    5040     3248546 :         dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
    5041             :     }
    5042     1866210 :     for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
    5043             :     {
    5044             :         // Loop over all pixels in the row.
    5045       90850 :         int iC = 0;
    5046       90850 :         int i = iMin;
    5047             :         // Process by chunk of 4 cols.
    5048       90850 :         XMMReg4Double v_acc = XMMReg4Double::Zero();
    5049      243258 :         for (; i + 2 < iMax; i += 4, iC += 4)
    5050             :         {
    5051             :             // Retrieve the pixel & accumulate.
    5052      152408 :             XMMReg4Double v_pixels =
    5053      152408 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    5054      152408 :             XMMReg4Double v_padfWeight =
    5055      152408 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    5056             : 
    5057      152408 :             v_acc += v_pixels * v_padfWeight;
    5058             :         }
    5059             : 
    5060       90850 :         double dfAccumulatorLocal = v_acc.GetHorizSum();
    5061             : 
    5062       90850 :         if (i < iMax)
    5063             :         {
    5064        2090 :             dfAccumulatorLocal +=
    5065        2090 :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    5066        2090 :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    5067        2090 :                                   padfWeightsHorizontal[iC + 1];
    5068        2090 :             i += 2;
    5069        2090 :             iC += 2;
    5070             :         }
    5071       90850 :         if (i == iMax)
    5072             :         {
    5073        1839 :             dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
    5074        1839 :                                   padfWeightsHorizontal[iC];
    5075             :         }
    5076             : 
    5077       90850 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    5078             :     }
    5079             : 
    5080     1775364 :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    5081             : 
    5082     1775364 :     return true;
    5083             : }
    5084             : 
    5085             : /************************************************************************/
    5086             : /*                     GWKResampleNoMasksT<GByte>()                     */
    5087             : /************************************************************************/
    5088             : 
    5089             : template <>
    5090     1270240 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
    5091             :                                 double dfSrcX, double dfSrcY, GByte *pValue,
    5092             :                                 double *padfWeightsHorizontal,
    5093             :                                 double *padfWeightsVertical,
    5094             :                                 double &dfInvWeights)
    5095             : {
    5096     1270240 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5097             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5098     1270240 :                                      dfInvWeights);
    5099             : }
    5100             : 
    5101             : /************************************************************************/
    5102             : /*                    GWKResampleNoMasksT<GInt16>()                     */
    5103             : /************************************************************************/
    5104             : 
    5105             : template <>
    5106      252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
    5107             :                                  double dfSrcX, double dfSrcY, GInt16 *pValue,
    5108             :                                  double *padfWeightsHorizontal,
    5109             :                                  double *padfWeightsVertical,
    5110             :                                  double &dfInvWeights)
    5111             : {
    5112      252563 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5113             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5114      252563 :                                      dfInvWeights);
    5115             : }
    5116             : 
    5117             : /************************************************************************/
    5118             : /*                    GWKResampleNoMasksT<GUInt16>()                    */
    5119             : /************************************************************************/
    5120             : 
    5121             : template <>
    5122      250063 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
    5123             :                                   double dfSrcX, double dfSrcY, GUInt16 *pValue,
    5124             :                                   double *padfWeightsHorizontal,
    5125             :                                   double *padfWeightsVertical,
    5126             :                                   double &dfInvWeights)
    5127             : {
    5128      250063 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5129             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5130      250063 :                                      dfInvWeights);
    5131             : }
    5132             : 
    5133             : /************************************************************************/
    5134             : /*                     GWKResampleNoMasksT<float>()                     */
    5135             : /************************************************************************/
    5136             : 
    5137             : template <>
    5138        2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
    5139             :                                 double dfSrcX, double dfSrcY, float *pValue,
    5140             :                                 double *padfWeightsHorizontal,
    5141             :                                 double *padfWeightsVertical,
    5142             :                                 double &dfInvWeights)
    5143             : {
    5144        2500 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5145             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5146        2500 :                                      dfInvWeights);
    5147             : }
    5148             : 
    5149             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    5150             : 
    5151             : /************************************************************************/
    5152             : /*                    GWKResampleNoMasksT<double>()                     */
    5153             : /************************************************************************/
    5154             : 
    5155             : template <>
    5156             : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
    5157             :                                  double dfSrcX, double dfSrcY, double *pValue,
    5158             :                                  double *padfWeightsHorizontal,
    5159             :                                  double *padfWeightsVertical,
    5160             :                                  double &dfInvWeights)
    5161             : {
    5162             :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5163             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5164             :                                      dfInvWeights);
    5165             : }
    5166             : 
    5167             : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
    5168             : 
    5169             : #endif /* defined(USE_SSE2) */
    5170             : 
    5171             : /************************************************************************/
    5172             : /*                     GWKRoundSourceCoordinates()                      */
    5173             : /************************************************************************/
    5174             : 
    5175        1000 : static void GWKRoundSourceCoordinates(
    5176             :     int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
    5177             :     double dfSrcCoordPrecision, double dfErrorThreshold,
    5178             :     GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
    5179             :     double dfDstY)
    5180             : {
    5181        1000 :     double dfPct = 0.8;
    5182        1000 :     if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
    5183             :     {
    5184        1000 :         dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
    5185             :     }
    5186        1000 :     const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
    5187             : 
    5188      501000 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5189             :     {
    5190      500000 :         const double dfXBefore = padfX[iDstX];
    5191      500000 :         const double dfYBefore = padfY[iDstX];
    5192      500000 :         padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    5193             :                        dfSrcCoordPrecision;
    5194      500000 :         padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    5195             :                        dfSrcCoordPrecision;
    5196             : 
    5197             :         // If we are in an uncertainty zone, go to non-approximated
    5198             :         // transformation.
    5199             :         // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
    5200             :         // be at least 10 times greater than the approximation error.
    5201      500000 :         if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
    5202      399914 :             fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
    5203             :         {
    5204      180090 :             padfX[iDstX] = iDstX + dfDstXOff;
    5205      180090 :             padfY[iDstX] = dfDstY;
    5206      180090 :             padfZ[iDstX] = 0.0;
    5207      180090 :             pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
    5208      180090 :                            padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
    5209      180090 :             padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    5210             :                            dfSrcCoordPrecision;
    5211      180090 :             padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    5212             :                            dfSrcCoordPrecision;
    5213             :         }
    5214             :     }
    5215        1000 : }
    5216             : 
    5217             : /************************************************************************/
    5218             : /*                    GWKCheckAndComputeSrcOffsets()                    */
    5219             : /************************************************************************/
    5220             : static CPL_INLINE bool
    5221   152627000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
    5222             :                              int _iDstY, double *_padfX, double *_padfY,
    5223             :                              int _nSrcXSize, int _nSrcYSize,
    5224             :                              GPtrDiff_t &iSrcOffset)
    5225             : {
    5226   152627000 :     const GDALWarpKernel *_poWK = psJob->poWK;
    5227   152832000 :     for (int iTry = 0; iTry < 2; ++iTry)
    5228             :     {
    5229   152832000 :         if (iTry == 1)
    5230             :         {
    5231             :             // If the source coordinate is slightly outside of the source raster
    5232             :             // retry to transform it alone, so that the exact coordinate
    5233             :             // transformer is used.
    5234             : 
    5235      205552 :             _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
    5236      205552 :             _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
    5237      205552 :             double dfZ = 0;
    5238      205552 :             _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
    5239      205552 :                                   _padfX + _iDstX, _padfY + _iDstX, &dfZ,
    5240      205552 :                                   _pabSuccess + _iDstX);
    5241             :         }
    5242   152832000 :         if (!_pabSuccess[_iDstX])
    5243     3614790 :             return false;
    5244             : 
    5245             :         // If this happens this is likely the symptom of a bug somewhere.
    5246   149218000 :         if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
    5247             :         {
    5248             :             static bool bNanCoordFound = false;
    5249           0 :             if (!bNanCoordFound)
    5250             :             {
    5251           0 :                 CPLDebug("WARP",
    5252             :                          "GWKCheckAndComputeSrcOffsets(): "
    5253             :                          "NaN coordinate found on point %d.",
    5254             :                          _iDstX);
    5255           0 :                 bNanCoordFound = true;
    5256             :             }
    5257           0 :             return false;
    5258             :         }
    5259             : 
    5260             :         /* --------------------------------------------------------------------
    5261             :          */
    5262             :         /*      Figure out what pixel we want in our source raster, and skip */
    5263             :         /*      further processing if it is well off the source image. */
    5264             :         /* --------------------------------------------------------------------
    5265             :          */
    5266             :         /* We test against the value before casting to avoid the */
    5267             :         /* problem of asymmetric truncation effects around zero.  That is */
    5268             :         /* -0.5 will be 0 when cast to an int. */
    5269   149218000 :         if (_padfX[_iDstX] < _poWK->nSrcXOff)
    5270             :         {
    5271             :             // If the source coordinate is slightly outside of the source raster
    5272             :             // retry to transform it alone, so that the exact coordinate
    5273             :             // transformer is used.
    5274     6006520 :             if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
    5275       42277 :                 continue;
    5276     5964240 :             return false;
    5277             :         }
    5278             : 
    5279   143211000 :         if (_padfY[_iDstX] < _poWK->nSrcYOff)
    5280             :         {
    5281             :             // If the source coordinate is slightly outside of the source raster
    5282             :             // retry to transform it alone, so that the exact coordinate
    5283             :             // transformer is used.
    5284     6203470 :             if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
    5285       64466 :                 continue;
    5286     6139010 :             return false;
    5287             :         }
    5288             : 
    5289             :         // Check for potential overflow when casting from float to int, (if
    5290             :         // operating outside natural projection area, padfX/Y can be a very huge
    5291             :         // positive number before doing the actual conversion), as such cast is
    5292             :         // undefined behavior that can trigger exception with some compilers
    5293             :         // (see #6753)
    5294   137008000 :         if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
    5295             :         {
    5296             :             // If the source coordinate is slightly outside of the source raster
    5297             :             // retry to transform it alone, so that the exact coordinate
    5298             :             // transformer is used.
    5299     3932310 :             if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
    5300       47544 :                 continue;
    5301     3884760 :             return false;
    5302             :         }
    5303   133075000 :         if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
    5304             :         {
    5305             :             // If the source coordinate is slightly outside of the source raster
    5306             :             // retry to transform it alone, so that the exact coordinate
    5307             :             // transformer is used.
    5308     4488370 :             if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
    5309       51265 :                 continue;
    5310     4437110 :             return false;
    5311             :         }
    5312             : 
    5313   128587000 :         break;
    5314             :     }
    5315             : 
    5316   128587000 :     int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
    5317   128587000 :     int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
    5318   128587000 :     if (iSrcX == _nSrcXSize)
    5319           0 :         iSrcX--;
    5320   128587000 :     if (iSrcY == _nSrcYSize)
    5321           0 :         iSrcY--;
    5322             : 
    5323             :     // Those checks should normally be OK given the previous ones.
    5324   128587000 :     CPLAssert(iSrcX >= 0);
    5325   128587000 :     CPLAssert(iSrcY >= 0);
    5326   128587000 :     CPLAssert(iSrcX < _nSrcXSize);
    5327   128587000 :     CPLAssert(iSrcY < _nSrcYSize);
    5328             : 
    5329   128587000 :     iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
    5330             : 
    5331   128587000 :     return true;
    5332             : }
    5333             : 
    5334             : /************************************************************************/
    5335             : /*                 GWKOneSourceCornerFailsToReproject()                 */
    5336             : /************************************************************************/
    5337             : 
    5338         917 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
    5339             : {
    5340         917 :     GDALWarpKernel *poWK = psJob->poWK;
    5341        2741 :     for (int iY = 0; iY <= 1; ++iY)
    5342             :     {
    5343        5478 :         for (int iX = 0; iX <= 1; ++iX)
    5344             :         {
    5345        3654 :             double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
    5346        3654 :             double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
    5347        3654 :             double dfZTmp = 0;
    5348        3654 :             int nSuccess = FALSE;
    5349        3654 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
    5350             :                                  &dfYTmp, &dfZTmp, &nSuccess);
    5351        3654 :             if (!nSuccess)
    5352           6 :                 return true;
    5353             :         }
    5354             :     }
    5355         911 :     return false;
    5356             : }
    5357             : 
    5358             : /************************************************************************/
    5359             : /*                      GWKAdjustSrcOffsetOnEdge()                      */
    5360             : /************************************************************************/
    5361             : 
    5362        9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
    5363             :                                      GPtrDiff_t &iSrcOffset)
    5364             : {
    5365        9714 :     GDALWarpKernel *poWK = psJob->poWK;
    5366        9714 :     const int nSrcXSize = poWK->nSrcXSize;
    5367        9714 :     const int nSrcYSize = poWK->nSrcYSize;
    5368             : 
    5369             :     // Check if the computed source position slightly altered
    5370             :     // fails to reproject. If so, then we are at the edge of
    5371             :     // the validity area, and it is worth checking neighbour
    5372             :     // source pixels for validity.
    5373        9714 :     int nSuccess = FALSE;
    5374             :     {
    5375        9714 :         double dfXTmp =
    5376        9714 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5377        9714 :         double dfYTmp =
    5378        9714 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5379        9714 :         double dfZTmp = 0;
    5380        9714 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5381             :                              &dfZTmp, &nSuccess);
    5382             :     }
    5383        9714 :     if (nSuccess)
    5384             :     {
    5385        6996 :         double dfXTmp =
    5386        6996 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5387        6996 :         double dfYTmp =
    5388        6996 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5389        6996 :         double dfZTmp = 0;
    5390        6996 :         nSuccess = FALSE;
    5391        6996 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5392             :                              &dfZTmp, &nSuccess);
    5393             :     }
    5394        9714 :     if (nSuccess)
    5395             :     {
    5396        5624 :         double dfXTmp =
    5397        5624 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5398        5624 :         double dfYTmp =
    5399        5624 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5400        5624 :         double dfZTmp = 0;
    5401        5624 :         nSuccess = FALSE;
    5402        5624 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5403             :                              &dfZTmp, &nSuccess);
    5404             :     }
    5405             : 
    5406       14166 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5407        4452 :         CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
    5408             :     {
    5409        1860 :         iSrcOffset++;
    5410        1860 :         return true;
    5411             :     }
    5412       10290 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5413        2436 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
    5414             :     {
    5415        1334 :         iSrcOffset += nSrcXSize;
    5416        1334 :         return true;
    5417             :     }
    5418        7838 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5419        1318 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
    5420             :     {
    5421         956 :         iSrcOffset--;
    5422         956 :         return true;
    5423             :     }
    5424        5924 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5425         360 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
    5426             :     {
    5427         340 :         iSrcOffset -= nSrcXSize;
    5428         340 :         return true;
    5429             :     }
    5430             : 
    5431        5224 :     return false;
    5432             : }
    5433             : 
    5434             : /************************************************************************/
    5435             : /*             GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity()              */
    5436             : /************************************************************************/
    5437             : 
    5438           0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
    5439             :                                                       GPtrDiff_t &iSrcOffset)
    5440             : {
    5441           0 :     GDALWarpKernel *poWK = psJob->poWK;
    5442           0 :     const int nSrcXSize = poWK->nSrcXSize;
    5443           0 :     const int nSrcYSize = poWK->nSrcYSize;
    5444             : 
    5445             :     // Check if the computed source position slightly altered
    5446             :     // fails to reproject. If so, then we are at the edge of
    5447             :     // the validity area, and it is worth checking neighbour
    5448             :     // source pixels for validity.
    5449           0 :     int nSuccess = FALSE;
    5450             :     {
    5451           0 :         double dfXTmp =
    5452           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5453           0 :         double dfYTmp =
    5454           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5455           0 :         double dfZTmp = 0;
    5456           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5457             :                              &dfZTmp, &nSuccess);
    5458             :     }
    5459           0 :     if (nSuccess)
    5460             :     {
    5461           0 :         double dfXTmp =
    5462           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5463           0 :         double dfYTmp =
    5464           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5465           0 :         double dfZTmp = 0;
    5466           0 :         nSuccess = FALSE;
    5467           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5468             :                              &dfZTmp, &nSuccess);
    5469             :     }
    5470           0 :     if (nSuccess)
    5471             :     {
    5472           0 :         double dfXTmp =
    5473           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5474           0 :         double dfYTmp =
    5475           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5476           0 :         double dfZTmp = 0;
    5477           0 :         nSuccess = FALSE;
    5478           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5479             :                              &dfZTmp, &nSuccess);
    5480             :     }
    5481             : 
    5482           0 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5483           0 :         poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >=
    5484             :             SRC_DENSITY_THRESHOLD_FLOAT)
    5485             :     {
    5486           0 :         iSrcOffset++;
    5487           0 :         return true;
    5488             :     }
    5489           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5490           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
    5491             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5492             :     {
    5493           0 :         iSrcOffset += nSrcXSize;
    5494           0 :         return true;
    5495             :     }
    5496           0 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5497           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
    5498             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5499             :     {
    5500           0 :         iSrcOffset--;
    5501           0 :         return true;
    5502             :     }
    5503           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5504           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
    5505             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5506             :     {
    5507           0 :         iSrcOffset -= nSrcXSize;
    5508           0 :         return true;
    5509             :     }
    5510             : 
    5511           0 :     return false;
    5512             : }
    5513             : 
    5514             : /************************************************************************/
    5515             : /*                           GWKGeneralCase()                           */
    5516             : /*                                                                      */
    5517             : /*      This is the most general case.  It attempts to handle all       */
    5518             : /*      possible features with relatively little concern for            */
    5519             : /*      efficiency.                                                     */
    5520             : /************************************************************************/
    5521             : 
    5522         239 : static void GWKGeneralCaseThread(void *pData)
    5523             : {
    5524         239 :     GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
    5525         239 :     GDALWarpKernel *poWK = psJob->poWK;
    5526         239 :     const int iYMin = psJob->iYMin;
    5527         239 :     const int iYMax = psJob->iYMax;
    5528             :     const double dfMultFactorVerticalShiftPipeline =
    5529         239 :         poWK->bApplyVerticalShift
    5530         239 :             ? CPLAtof(CSLFetchNameValueDef(
    5531           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5532             :                   "1.0"))
    5533         239 :             : 0.0;
    5534             :     const bool bAvoidNoDataSingleBand =
    5535         239 :         poWK->nBands == 1 ||
    5536           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    5537         239 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    5538             : 
    5539         239 :     int nDstXSize = poWK->nDstXSize;
    5540         239 :     int nSrcXSize = poWK->nSrcXSize;
    5541         239 :     int nSrcYSize = poWK->nSrcYSize;
    5542             : 
    5543             :     /* -------------------------------------------------------------------- */
    5544             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5545             :     /*      scanlines worth of positions.                                   */
    5546             :     /* -------------------------------------------------------------------- */
    5547             :     // For x, 2 *, because we cache the precomputed values at the end.
    5548             :     double *padfX =
    5549         239 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5550             :     double *padfY =
    5551         239 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5552             :     double *padfZ =
    5553         239 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5554         239 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5555             : 
    5556         239 :     const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
    5557             : 
    5558         239 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5559         239 :     if (poWK->eResample != GRA_NearestNeighbour)
    5560             :     {
    5561         220 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5562             :     }
    5563         239 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5564         239 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5565         239 :     const double dfErrorThreshold = CPLAtof(
    5566         239 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5567             : 
    5568             :     const bool bOneSourceCornerFailsToReproject =
    5569         239 :         GWKOneSourceCornerFailsToReproject(psJob);
    5570             : 
    5571             :     // Precompute values.
    5572        6469 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5573        6230 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5574             : 
    5575             :     /* ==================================================================== */
    5576             :     /*      Loop over output lines.                                         */
    5577             :     /* ==================================================================== */
    5578        6469 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5579             :     {
    5580             :         /* --------------------------------------------------------------------
    5581             :          */
    5582             :         /*      Setup points to transform to source image space. */
    5583             :         /* --------------------------------------------------------------------
    5584             :          */
    5585        6230 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5586        6230 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5587      242390 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5588      236160 :             padfY[iDstX] = dfY;
    5589        6230 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5590             : 
    5591             :         /* --------------------------------------------------------------------
    5592             :          */
    5593             :         /*      Transform the points from destination pixel/line coordinates */
    5594             :         /*      to source pixel/line coordinates. */
    5595             :         /* --------------------------------------------------------------------
    5596             :          */
    5597        6230 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5598             :                              padfY, padfZ, pabSuccess);
    5599        6230 :         if (dfSrcCoordPrecision > 0.0)
    5600             :         {
    5601           0 :             GWKRoundSourceCoordinates(
    5602             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5603             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5604           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5605             :         }
    5606             : 
    5607             :         /* ====================================================================
    5608             :          */
    5609             :         /*      Loop over pixels in output scanline. */
    5610             :         /* ====================================================================
    5611             :          */
    5612      242390 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5613             :         {
    5614      236160 :             GPtrDiff_t iSrcOffset = 0;
    5615      236160 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5616             :                                               padfX, padfY, nSrcXSize,
    5617             :                                               nSrcYSize, iSrcOffset))
    5618           0 :                 continue;
    5619             : 
    5620             :             /* --------------------------------------------------------------------
    5621             :              */
    5622             :             /*      Do not try to apply transparent/invalid source pixels to the
    5623             :              */
    5624             :             /*      destination.  This currently ignores the multi-pixel input
    5625             :              */
    5626             :             /*      of bilinear and cubic resamples. */
    5627             :             /* --------------------------------------------------------------------
    5628             :              */
    5629      236160 :             double dfDensity = 1.0;
    5630             : 
    5631      236160 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5632             :             {
    5633        1200 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5634        1200 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    5635             :                 {
    5636           0 :                     if (!bOneSourceCornerFailsToReproject)
    5637             :                     {
    5638           0 :                         continue;
    5639             :                     }
    5640           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5641             :                                  psJob, iSrcOffset))
    5642             :                     {
    5643           0 :                         dfDensity =
    5644           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5645             :                     }
    5646             :                     else
    5647             :                     {
    5648           0 :                         continue;
    5649             :                     }
    5650             :                 }
    5651             :             }
    5652             : 
    5653      236160 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5654           0 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5655             :             {
    5656           0 :                 if (!bOneSourceCornerFailsToReproject)
    5657             :                 {
    5658           0 :                     continue;
    5659             :                 }
    5660           0 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5661             :                 {
    5662           0 :                     continue;
    5663             :                 }
    5664             :             }
    5665             : 
    5666             :             /* ====================================================================
    5667             :              */
    5668             :             /*      Loop processing each band. */
    5669             :             /* ====================================================================
    5670             :              */
    5671      236160 :             bool bHasFoundDensity = false;
    5672             : 
    5673      236160 :             const GPtrDiff_t iDstOffset =
    5674      236160 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5675      472320 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5676             :             {
    5677      236160 :                 double dfBandDensity = 0.0;
    5678      236160 :                 double dfValueReal = 0.0;
    5679      236160 :                 double dfValueImag = 0.0;
    5680             : 
    5681             :                 /* --------------------------------------------------------------------
    5682             :                  */
    5683             :                 /*      Collect the source value. */
    5684             :                 /* --------------------------------------------------------------------
    5685             :                  */
    5686      236160 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5687             :                     nSrcYSize == 1)
    5688             :                 {
    5689             :                     // FALSE is returned if dfBandDensity == 0, which is
    5690             :                     // checked below.
    5691         568 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValue(
    5692             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
    5693             :                         &dfValueImag));
    5694             :                 }
    5695      235592 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5696             :                 {
    5697         248 :                     GWKBilinearResample4Sample(
    5698         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5699         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5700             :                         &dfValueReal, &dfValueImag);
    5701             :                 }
    5702      235344 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5703             :                 {
    5704         248 :                     GWKCubicResample4Sample(
    5705         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5706         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5707             :                         &dfValueReal, &dfValueImag);
    5708             :                 }
    5709             :                 else
    5710             : #ifdef DEBUG
    5711             :                     // Only useful for clang static analyzer.
    5712      235096 :                     if (psWrkStruct != nullptr)
    5713             : #endif
    5714             :                     {
    5715      235096 :                         psWrkStruct->pfnGWKResample(
    5716      235096 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5717      235096 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5718             :                             &dfValueReal, &dfValueImag, psWrkStruct);
    5719             :                     }
    5720             : 
    5721             :                 // If we didn't find any valid inputs skip to next band.
    5722      236160 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5723           0 :                     continue;
    5724             : 
    5725      236160 :                 if (poWK->bApplyVerticalShift)
    5726             :                 {
    5727           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5728           0 :                         continue;
    5729             :                     // Subtract padfZ[] since the coordinate transformation is
    5730             :                     // from target to source
    5731           0 :                     dfValueReal =
    5732           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5733           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5734             :                 }
    5735             : 
    5736      236160 :                 bHasFoundDensity = true;
    5737             : 
    5738             :                 /* --------------------------------------------------------------------
    5739             :                  */
    5740             :                 /*      We have a computed value from the source.  Now apply it
    5741             :                  * to      */
    5742             :                 /*      the destination pixel. */
    5743             :                 /* --------------------------------------------------------------------
    5744             :                  */
    5745      236160 :                 GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    5746             :                                  dfValueReal, dfValueImag,
    5747             :                                  bAvoidNoDataSingleBand);
    5748             :             }
    5749             : 
    5750      236160 :             if (!bHasFoundDensity)
    5751           0 :                 continue;
    5752             : 
    5753      236160 :             if (!bAvoidNoDataSingleBand)
    5754             :             {
    5755           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    5756             :             }
    5757             : 
    5758             :             /* --------------------------------------------------------------------
    5759             :              */
    5760             :             /*      Update destination density/validity masks. */
    5761             :             /* --------------------------------------------------------------------
    5762             :              */
    5763      236160 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5764             : 
    5765      236160 :             if (poWK->panDstValid != nullptr)
    5766             :             {
    5767           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5768             :             }
    5769             :         } /* Next iDstX */
    5770             : 
    5771             :         /* --------------------------------------------------------------------
    5772             :          */
    5773             :         /*      Report progress to the user, and optionally cancel out. */
    5774             :         /* --------------------------------------------------------------------
    5775             :          */
    5776        6230 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5777           0 :             break;
    5778             :     }
    5779             : 
    5780             :     /* -------------------------------------------------------------------- */
    5781             :     /*      Cleanup and return.                                             */
    5782             :     /* -------------------------------------------------------------------- */
    5783         239 :     CPLFree(padfX);
    5784         239 :     CPLFree(padfY);
    5785         239 :     CPLFree(padfZ);
    5786         239 :     CPLFree(pabSuccess);
    5787         239 :     if (psWrkStruct)
    5788         220 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5789         239 : }
    5790             : 
    5791         239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
    5792             : {
    5793         239 :     return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
    5794             : }
    5795             : 
    5796             : /************************************************************************/
    5797             : /*                            GWKRealCase()                             */
    5798             : /*                                                                      */
    5799             : /*      General case for non-complex data types.                        */
    5800             : /************************************************************************/
    5801             : 
    5802         219 : static void GWKRealCaseThread(void *pData)
    5803             : 
    5804             : {
    5805         219 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    5806         219 :     GDALWarpKernel *poWK = psJob->poWK;
    5807         219 :     const int iYMin = psJob->iYMin;
    5808         219 :     const int iYMax = psJob->iYMax;
    5809             : 
    5810         219 :     const int nDstXSize = poWK->nDstXSize;
    5811         219 :     const int nSrcXSize = poWK->nSrcXSize;
    5812         219 :     const int nSrcYSize = poWK->nSrcYSize;
    5813             :     const double dfMultFactorVerticalShiftPipeline =
    5814         219 :         poWK->bApplyVerticalShift
    5815         219 :             ? CPLAtof(CSLFetchNameValueDef(
    5816           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5817             :                   "1.0"))
    5818         219 :             : 0.0;
    5819             :     const bool bAvoidNoDataSingleBand =
    5820         297 :         poWK->nBands == 1 ||
    5821          78 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    5822         219 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    5823             : 
    5824             :     /* -------------------------------------------------------------------- */
    5825             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5826             :     /*      scanlines worth of positions.                                   */
    5827             :     /* -------------------------------------------------------------------- */
    5828             : 
    5829             :     // For x, 2 *, because we cache the precomputed values at the end.
    5830             :     double *padfX =
    5831         219 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5832             :     double *padfY =
    5833         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5834             :     double *padfZ =
    5835         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5836         219 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5837             : 
    5838         219 :     const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
    5839             : 
    5840         219 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5841         219 :     if (poWK->eResample != GRA_NearestNeighbour)
    5842             :     {
    5843         177 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5844             :     }
    5845         219 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5846         219 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5847         219 :     const double dfErrorThreshold = CPLAtof(
    5848         219 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5849             : 
    5850         626 :     const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
    5851         407 :                                    poWK->papanBandSrcValid == nullptr &&
    5852         188 :                                    poWK->pafUnifiedSrcDensity != nullptr;
    5853             : 
    5854             :     const bool bOneSourceCornerFailsToReproject =
    5855         219 :         GWKOneSourceCornerFailsToReproject(psJob);
    5856             : 
    5857             :     // Precompute values.
    5858       22605 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5859       22386 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5860             : 
    5861             :     /* ==================================================================== */
    5862             :     /*      Loop over output lines.                                         */
    5863             :     /* ==================================================================== */
    5864       25393 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5865             :     {
    5866             :         /* --------------------------------------------------------------------
    5867             :          */
    5868             :         /*      Setup points to transform to source image space. */
    5869             :         /* --------------------------------------------------------------------
    5870             :          */
    5871       25174 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5872       25174 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5873    44331500 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5874    44306300 :             padfY[iDstX] = dfY;
    5875       25174 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5876             : 
    5877             :         /* --------------------------------------------------------------------
    5878             :          */
    5879             :         /*      Transform the points from destination pixel/line coordinates */
    5880             :         /*      to source pixel/line coordinates. */
    5881             :         /* --------------------------------------------------------------------
    5882             :          */
    5883       25174 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5884             :                              padfY, padfZ, pabSuccess);
    5885       25174 :         if (dfSrcCoordPrecision > 0.0)
    5886             :         {
    5887           0 :             GWKRoundSourceCoordinates(
    5888             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5889             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5890           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5891             :         }
    5892             : 
    5893             :         /* ====================================================================
    5894             :          */
    5895             :         /*      Loop over pixels in output scanline. */
    5896             :         /* ====================================================================
    5897             :          */
    5898    44331500 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5899             :         {
    5900    44306300 :             GPtrDiff_t iSrcOffset = 0;
    5901    44306300 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5902             :                                               padfX, padfY, nSrcXSize,
    5903             :                                               nSrcYSize, iSrcOffset))
    5904    43567600 :                 continue;
    5905             : 
    5906             :             /* --------------------------------------------------------------------
    5907             :              */
    5908             :             /*      Do not try to apply transparent/invalid source pixels to the
    5909             :              */
    5910             :             /*      destination.  This currently ignores the multi-pixel input
    5911             :              */
    5912             :             /*      of bilinear and cubic resamples. */
    5913             :             /* --------------------------------------------------------------------
    5914             :              */
    5915    31793100 :             double dfDensity = 1.0;
    5916             : 
    5917    31793100 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5918             :             {
    5919     1656100 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5920     1656100 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    5921             :                 {
    5922     1525010 :                     if (!bOneSourceCornerFailsToReproject)
    5923             :                     {
    5924     1525010 :                         continue;
    5925             :                     }
    5926           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5927             :                                  psJob, iSrcOffset))
    5928             :                     {
    5929           0 :                         dfDensity =
    5930           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5931             :                     }
    5932             :                     else
    5933             :                     {
    5934           0 :                         continue;
    5935             :                     }
    5936             :                 }
    5937             :             }
    5938             : 
    5939    59897300 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5940    29629200 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5941             :             {
    5942    29531600 :                 if (!bOneSourceCornerFailsToReproject)
    5943             :                 {
    5944    29529300 :                     continue;
    5945             :                 }
    5946        2229 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5947             :                 {
    5948           0 :                     continue;
    5949             :                 }
    5950             :             }
    5951             : 
    5952             :             /* ====================================================================
    5953             :              */
    5954             :             /*      Loop processing each band. */
    5955             :             /* ====================================================================
    5956             :              */
    5957      738768 :             bool bHasFoundDensity = false;
    5958             : 
    5959      738768 :             const GPtrDiff_t iDstOffset =
    5960      738768 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5961     2069310 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5962             :             {
    5963     1330540 :                 double dfBandDensity = 0.0;
    5964     1330540 :                 double dfValueReal = 0.0;
    5965             : 
    5966             :                 /* --------------------------------------------------------------------
    5967             :                  */
    5968             :                 /*      Collect the source value. */
    5969             :                 /* --------------------------------------------------------------------
    5970             :                  */
    5971     1330540 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5972             :                     nSrcYSize == 1)
    5973             :                 {
    5974             :                     // FALSE is returned if dfBandDensity == 0, which is
    5975             :                     // checked below.
    5976       15516 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
    5977             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
    5978             :                 }
    5979     1315030 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5980             :                 {
    5981        2046 :                     double dfValueImagIgnored = 0.0;
    5982        2046 :                     GWKBilinearResample4Sample(
    5983        2046 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5984        2046 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5985        2046 :                         &dfValueReal, &dfValueImagIgnored);
    5986             :                 }
    5987     1312980 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5988             :                 {
    5989      691552 :                     if (bSrcMaskIsDensity)
    5990             :                     {
    5991      389755 :                         if (poWK->eWorkingDataType == GDT_UInt8)
    5992             :                         {
    5993      389755 :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
    5994      389755 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5995      389755 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5996             :                                 &dfValueReal);
    5997             :                         }
    5998           0 :                         else if (poWK->eWorkingDataType == GDT_UInt16)
    5999             :                         {
    6000             :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<
    6001           0 :                                 GUInt16>(poWK, iBand,
    6002           0 :                                          padfX[iDstX] - poWK->nSrcXOff,
    6003           0 :                                          padfY[iDstX] - poWK->nSrcYOff,
    6004             :                                          &dfBandDensity, &dfValueReal);
    6005             :                         }
    6006             :                         else
    6007             :                         {
    6008           0 :                             GWKCubicResampleSrcMaskIsDensity4SampleReal(
    6009           0 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6010           0 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6011             :                                 &dfValueReal);
    6012             :                         }
    6013             :                     }
    6014             :                     else
    6015             :                     {
    6016      301797 :                         double dfValueImagIgnored = 0.0;
    6017      301797 :                         GWKCubicResample4Sample(
    6018      301797 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6019      301797 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6020             :                             &dfValueReal, &dfValueImagIgnored);
    6021      691552 :                     }
    6022             :                 }
    6023             :                 else
    6024             : #ifdef DEBUG
    6025             :                     // Only useful for clang static analyzer.
    6026      621431 :                     if (psWrkStruct != nullptr)
    6027             : #endif
    6028             :                     {
    6029      621431 :                         double dfValueImagIgnored = 0.0;
    6030      621431 :                         psWrkStruct->pfnGWKResample(
    6031      621431 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6032      621431 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6033             :                             &dfValueReal, &dfValueImagIgnored, psWrkStruct);
    6034             :                     }
    6035             : 
    6036             :                 // If we didn't find any valid inputs skip to next band.
    6037     1330540 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    6038           0 :                     continue;
    6039             : 
    6040     1330540 :                 if (poWK->bApplyVerticalShift)
    6041             :                 {
    6042           0 :                     if (!std::isfinite(padfZ[iDstX]))
    6043           0 :                         continue;
    6044             :                     // Subtract padfZ[] since the coordinate transformation is
    6045             :                     // from target to source
    6046           0 :                     dfValueReal =
    6047           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    6048           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    6049             :                 }
    6050             : 
    6051     1330540 :                 bHasFoundDensity = true;
    6052             : 
    6053             :                 /* --------------------------------------------------------------------
    6054             :                  */
    6055             :                 /*      We have a computed value from the source.  Now apply it
    6056             :                  * to      */
    6057             :                 /*      the destination pixel. */
    6058             :                 /* --------------------------------------------------------------------
    6059             :                  */
    6060     1330540 :                 GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
    6061             :                                      dfValueReal, bAvoidNoDataSingleBand);
    6062             :             }
    6063             : 
    6064      738768 :             if (!bHasFoundDensity)
    6065           0 :                 continue;
    6066             : 
    6067      738768 :             if (!bAvoidNoDataSingleBand)
    6068             :             {
    6069      100295 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    6070             :             }
    6071             : 
    6072             :             /* --------------------------------------------------------------------
    6073             :              */
    6074             :             /*      Update destination density/validity masks. */
    6075             :             /* --------------------------------------------------------------------
    6076             :              */
    6077      738768 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6078             : 
    6079      738768 :             if (poWK->panDstValid != nullptr)
    6080             :             {
    6081      104586 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6082             :             }
    6083             :         }  // Next iDstX.
    6084             : 
    6085             :         /* --------------------------------------------------------------------
    6086             :          */
    6087             :         /*      Report progress to the user, and optionally cancel out. */
    6088             :         /* --------------------------------------------------------------------
    6089             :          */
    6090       25174 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6091           0 :             break;
    6092             :     }
    6093             : 
    6094             :     /* -------------------------------------------------------------------- */
    6095             :     /*      Cleanup and return.                                             */
    6096             :     /* -------------------------------------------------------------------- */
    6097         219 :     CPLFree(padfX);
    6098         219 :     CPLFree(padfY);
    6099         219 :     CPLFree(padfZ);
    6100         219 :     CPLFree(pabSuccess);
    6101         219 :     if (psWrkStruct)
    6102         177 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    6103         219 : }
    6104             : 
    6105         219 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
    6106             : {
    6107         219 :     return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
    6108             : }
    6109             : 
    6110             : /************************************************************************/
    6111             : /*                 GWKCubicResampleNoMasks4MultiBandT()                 */
    6112             : /************************************************************************/
    6113             : 
    6114             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    6115             : /* and enough SSE registries */
    6116             : #if defined(USE_SSE2)
    6117             : 
    6118   115847000 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
    6119             :                                  const __m128 row2, const __m128 row3,
    6120             :                                  const __m128 weightsXY0,
    6121             :                                  const __m128 weightsXY1,
    6122             :                                  const __m128 weightsXY2,
    6123             :                                  const __m128 weightsXY3)
    6124             : {
    6125   810929000 :     return XMMHorizontalAdd(_mm_add_ps(
    6126             :         _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
    6127             :         _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
    6128   115847000 :                    _mm_mul_ps(row3, weightsXY3))));
    6129             : }
    6130             : 
    6131             : template <class T>
    6132    39602542 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
    6133             :                                                double dfSrcX, double dfSrcY,
    6134             :                                                const GPtrDiff_t iDstOffset)
    6135             : {
    6136    39602542 :     const double dfSrcXShifted = dfSrcX - 0.5;
    6137    39602542 :     const int iSrcX = static_cast<int>(dfSrcXShifted);
    6138    39602542 :     const double dfSrcYShifted = dfSrcY - 0.5;
    6139    39602542 :     const int iSrcY = static_cast<int>(dfSrcYShifted);
    6140    39602542 :     const GPtrDiff_t iSrcOffset =
    6141    39602542 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    6142             : 
    6143             :     // Get the bilinear interpolation at the image borders.
    6144    39602542 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    6145    38630062 :         iSrcY + 2 >= poWK->nSrcYSize)
    6146             :     {
    6147     3947270 :         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6148             :         {
    6149             :             T value;
    6150     2960450 :             GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    6151             :                                                &value);
    6152     2960450 :             reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6153             :                 value;
    6154      986817 :         }
    6155             :     }
    6156             :     else
    6157             :     {
    6158    38615662 :         const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
    6159    38615662 :         const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
    6160             : 
    6161             :         float afCoeffsX[4];
    6162             :         float afCoeffsY[4];
    6163    38615662 :         GWKCubicComputeWeights(fDeltaX, afCoeffsX);
    6164    38615662 :         GWKCubicComputeWeights(fDeltaY, afCoeffsY);
    6165    38615662 :         const auto weightsX = _mm_loadu_ps(afCoeffsX);
    6166             :         const auto weightsXY0 =
    6167    77231324 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
    6168             :         const auto weightsXY1 =
    6169    77231324 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
    6170             :         const auto weightsXY2 =
    6171    77231324 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
    6172             :         const auto weightsXY3 =
    6173    38615662 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
    6174             : 
    6175    38615662 :         const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
    6176             : 
    6177    38615662 :         int iBand = 0;
    6178             :         // Process 2 bands at a time
    6179    77231324 :         for (; iBand + 1 < poWK->nBands; iBand += 2)
    6180             :         {
    6181    38615662 :             const T *CPL_RESTRICT pBand0 =
    6182    38615662 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6183    38615662 :             const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
    6184             :             const auto row1_0 =
    6185    38615662 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6186             :             const auto row2_0 =
    6187    38615662 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6188             :             const auto row3_0 =
    6189    38615662 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6190             : 
    6191    38615662 :             const T *CPL_RESTRICT pBand1 =
    6192    38615662 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
    6193    38615662 :             const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
    6194             :             const auto row1_1 =
    6195    38615662 :                 XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
    6196             :             const auto row2_1 =
    6197    38615662 :                 XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
    6198             :             const auto row3_1 =
    6199    38615662 :                 XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
    6200             : 
    6201             :             const float fValue_0 =
    6202    38615662 :                 Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
    6203             :                              weightsXY1, weightsXY2, weightsXY3);
    6204             : 
    6205             :             const float fValue_1 =
    6206    38615662 :                 Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
    6207             :                              weightsXY1, weightsXY2, weightsXY3);
    6208             : 
    6209    38615662 :             T *CPL_RESTRICT pDstBand0 =
    6210    38615662 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6211    38615662 :             pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
    6212             : 
    6213    38615662 :             T *CPL_RESTRICT pDstBand1 =
    6214    38615662 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
    6215    38615662 :             pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
    6216             :         }
    6217    38615662 :         if (iBand < poWK->nBands)
    6218             :         {
    6219    38615662 :             const T *CPL_RESTRICT pBand0 =
    6220    38615662 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6221    38615662 :             const auto row0 = XMMLoad4Values(pBand0 + iOffset);
    6222             :             const auto row1 =
    6223    38615662 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6224             :             const auto row2 =
    6225    38615662 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6226             :             const auto row3 =
    6227    38615662 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6228             : 
    6229             :             const float fValue =
    6230    38615662 :                 Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
    6231             :                              weightsXY2, weightsXY3);
    6232             : 
    6233    38615662 :             T *CPL_RESTRICT pDstBand =
    6234    38615662 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6235    38615662 :             pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
    6236             :         }
    6237             :     }
    6238             : 
    6239    39602542 :     if (poWK->pafDstDensity)
    6240    37448501 :         poWK->pafDstDensity[iDstOffset] = 1.0f;
    6241    39602542 : }
    6242             : 
    6243             : #endif  // defined(USE_SSE2)
    6244             : 
    6245             : /************************************************************************/
    6246             : /*          GWKResampleNoMasksOrDstDensityOnlyThreadInternal()          */
    6247             : /************************************************************************/
    6248             : 
    6249             : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
    6250        1844 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
    6251             : 
    6252             : {
    6253        1844 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6254        1844 :     GDALWarpKernel *poWK = psJob->poWK;
    6255        1844 :     const int iYMin = psJob->iYMin;
    6256        1844 :     const int iYMax = psJob->iYMax;
    6257        1826 :     const double dfMultFactorVerticalShiftPipeline =
    6258        1844 :         poWK->bApplyVerticalShift
    6259          18 :             ? CPLAtof(CSLFetchNameValueDef(
    6260          18 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6261             :                   "1.0"))
    6262             :             : 0.0;
    6263             : 
    6264        1844 :     const int nDstXSize = poWK->nDstXSize;
    6265        1844 :     const int nSrcXSize = poWK->nSrcXSize;
    6266        1844 :     const int nSrcYSize = poWK->nSrcYSize;
    6267             : 
    6268             :     /* -------------------------------------------------------------------- */
    6269             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6270             :     /*      scanlines worth of positions.                                   */
    6271             :     /* -------------------------------------------------------------------- */
    6272             : 
    6273             :     // For x, 2 *, because we cache the precomputed values at the end.
    6274             :     double *padfX =
    6275        1844 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6276             :     double *padfY =
    6277        1844 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6278             :     double *padfZ =
    6279        1844 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6280        1844 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6281             : 
    6282        1844 :     const int nXRadius = poWK->nXRadius;
    6283             :     double *padfWeightsX =
    6284        1844 :         static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
    6285             :     double *padfWeightsY = static_cast<double *>(
    6286        1844 :         CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
    6287        1844 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6288        1844 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6289        1844 :     const double dfErrorThreshold = CPLAtof(
    6290        1844 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6291             : 
    6292             :     // Precompute values.
    6293      418917 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6294      417073 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6295             : 
    6296             :     /* ==================================================================== */
    6297             :     /*      Loop over output lines.                                         */
    6298             :     /* ==================================================================== */
    6299      293464 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6300             :     {
    6301             :         /* --------------------------------------------------------------------
    6302             :          */
    6303             :         /*      Setup points to transform to source image space. */
    6304             :         /* --------------------------------------------------------------------
    6305             :          */
    6306      291621 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6307      291621 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6308    98590159 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6309    98298535 :             padfY[iDstX] = dfY;
    6310      291621 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6311             : 
    6312             :         /* --------------------------------------------------------------------
    6313             :          */
    6314             :         /*      Transform the points from destination pixel/line coordinates */
    6315             :         /*      to source pixel/line coordinates. */
    6316             :         /* --------------------------------------------------------------------
    6317             :          */
    6318      291621 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6319             :                              padfY, padfZ, pabSuccess);
    6320      291621 :         if (dfSrcCoordPrecision > 0.0)
    6321             :         {
    6322        1000 :             GWKRoundSourceCoordinates(
    6323             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6324             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6325        1000 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6326             :         }
    6327             : 
    6328             :         /* ====================================================================
    6329             :          */
    6330             :         /*      Loop over pixels in output scanline. */
    6331             :         /* ====================================================================
    6332             :          */
    6333    98590159 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6334             :         {
    6335    98298535 :             GPtrDiff_t iSrcOffset = 0;
    6336    98298535 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6337             :                                               padfX, padfY, nSrcXSize,
    6338             :                                               nSrcYSize, iSrcOffset))
    6339    49822126 :                 continue;
    6340             : 
    6341             :             /* ====================================================================
    6342             :              */
    6343             :             /*      Loop processing each band. */
    6344             :             /* ====================================================================
    6345             :              */
    6346    88079019 :             const GPtrDiff_t iDstOffset =
    6347    88079019 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6348             : 
    6349             : #if defined(USE_SSE2)
    6350             :             if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
    6351             :                           (std::is_same<T, GByte>::value ||
    6352             :                            std::is_same<T, GUInt16>::value))
    6353             :             {
    6354    40668141 :                 if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
    6355             :                 {
    6356    39602542 :                     GWKCubicResampleNoMasks4MultiBandT<T>(
    6357    39602542 :                         poWK, padfX[iDstX] - poWK->nSrcXOff,
    6358    39602542 :                         padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
    6359             : 
    6360    39602542 :                     continue;
    6361             :                 }
    6362             :             }
    6363             : #endif  // defined(USE_SSE2)
    6364             : 
    6365    48476490 :             [[maybe_unused]] double dfInvWeights = 0;
    6366   134855396 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6367             :             {
    6368    86378910 :                 T value = 0;
    6369             :                 if constexpr (eResample == GRA_NearestNeighbour)
    6370             :                 {
    6371    78474930 :                     value = reinterpret_cast<T *>(
    6372    78474930 :                         poWK->papabySrcImage[iBand])[iSrcOffset];
    6373             :                 }
    6374             :                 else if constexpr (bUse4SamplesFormula)
    6375             :                 {
    6376             :                     if constexpr (eResample == GRA_Bilinear)
    6377     3827651 :                         GWKBilinearResampleNoMasks4SampleT(
    6378     3827651 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6379     3827651 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6380             :                     else
    6381     2300964 :                         GWKCubicResampleNoMasks4SampleT(
    6382     2300964 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6383     2300964 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6384             :                 }
    6385             :                 else
    6386             :                 {
    6387     1775365 :                     GWKResampleNoMasksT(
    6388     1775365 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6389     1775365 :                         padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
    6390             :                         padfWeightsY, dfInvWeights);
    6391             :                 }
    6392             : 
    6393    86378910 :                 if (poWK->bApplyVerticalShift)
    6394             :                 {
    6395         818 :                     if (!std::isfinite(padfZ[iDstX]))
    6396           0 :                         continue;
    6397             :                     // Subtract padfZ[] since the coordinate transformation is
    6398             :                     // from target to source
    6399         818 :                     value = GWKClampValueT<T>(
    6400         818 :                         double(value) * poWK->dfMultFactorVerticalShift -
    6401         818 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6402             :                 }
    6403             : 
    6404    86378910 :                 if (poWK->pafDstDensity)
    6405    12985339 :                     poWK->pafDstDensity[iDstOffset] = 1.0f;
    6406             : 
    6407    86378910 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6408             :                     value;
    6409             :             }
    6410             :         }
    6411             : 
    6412             :         /* --------------------------------------------------------------------
    6413             :          */
    6414             :         /*      Report progress to the user, and optionally cancel out. */
    6415             :         /* --------------------------------------------------------------------
    6416             :          */
    6417      291621 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6418           1 :             break;
    6419             :     }
    6420             : 
    6421             :     /* -------------------------------------------------------------------- */
    6422             :     /*      Cleanup and return.                                             */
    6423             :     /* -------------------------------------------------------------------- */
    6424        1844 :     CPLFree(padfX);
    6425        1844 :     CPLFree(padfY);
    6426        1844 :     CPLFree(padfZ);
    6427        1844 :     CPLFree(pabSuccess);
    6428        1844 :     CPLFree(padfWeightsX);
    6429        1844 :     CPLFree(padfWeightsY);
    6430        1844 : }
    6431             : 
    6432             : template <class T, GDALResampleAlg eResample>
    6433         995 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
    6434             : {
    6435         995 :     GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6436             :         pData);
    6437         995 : }
    6438             : 
    6439             : template <class T, GDALResampleAlg eResample>
    6440         849 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
    6441             : 
    6442             : {
    6443         849 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6444         849 :     GDALWarpKernel *poWK = psJob->poWK;
    6445             :     static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
    6446         849 :     const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
    6447         849 :     if (bUse4SamplesFormula)
    6448         792 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
    6449             :             pData);
    6450             :     else
    6451          57 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6452             :             pData);
    6453         849 : }
    6454             : 
    6455         944 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6456             : {
    6457         944 :     return GWKRun(
    6458             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
    6459         944 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
    6460             : }
    6461             : 
    6462         126 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6463             : {
    6464         126 :     return GWKRun(
    6465             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
    6466             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
    6467         126 :                                                            GRA_Bilinear>);
    6468             : }
    6469             : 
    6470         677 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6471             : {
    6472         677 :     return GWKRun(
    6473             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
    6474         677 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
    6475             : }
    6476             : 
    6477           9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6478             : {
    6479           9 :     return GWKRun(
    6480             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
    6481           9 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
    6482             : }
    6483             : 
    6484             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6485             : 
    6486             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6487             : {
    6488             :     return GWKRun(
    6489             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
    6490             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
    6491             : }
    6492             : #endif
    6493             : 
    6494          12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6495             : {
    6496          12 :     return GWKRun(
    6497             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
    6498          12 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
    6499             : }
    6500             : 
    6501             : /************************************************************************/
    6502             : /*                          GWKNearestByte()                            */
    6503             : /*                                                                      */
    6504             : /*      Case for 8bit input data with nearest neighbour resampling      */
    6505             : /*      using valid flags. Should be as fast as possible for this       */
    6506             : /*      particular transformation type.                                 */
    6507             : /************************************************************************/
    6508             : 
    6509         459 : template <class T> static void GWKNearestThread(void *pData)
    6510             : 
    6511             : {
    6512         459 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6513         459 :     GDALWarpKernel *poWK = psJob->poWK;
    6514         459 :     const int iYMin = psJob->iYMin;
    6515         459 :     const int iYMax = psJob->iYMax;
    6516         459 :     const double dfMultFactorVerticalShiftPipeline =
    6517         459 :         poWK->bApplyVerticalShift
    6518           0 :             ? CPLAtof(CSLFetchNameValueDef(
    6519           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6520             :                   "1.0"))
    6521             :             : 0.0;
    6522         459 :     const bool bAvoidNoDataSingleBand =
    6523         525 :         poWK->nBands == 1 ||
    6524          66 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    6525             :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    6526             : 
    6527         459 :     const int nDstXSize = poWK->nDstXSize;
    6528         459 :     const int nSrcXSize = poWK->nSrcXSize;
    6529         459 :     const int nSrcYSize = poWK->nSrcYSize;
    6530             : 
    6531             :     /* -------------------------------------------------------------------- */
    6532             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6533             :     /*      scanlines worth of positions.                                   */
    6534             :     /* -------------------------------------------------------------------- */
    6535             : 
    6536             :     // For x, 2 *, because we cache the precomputed values at the end.
    6537             :     double *padfX =
    6538         459 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6539             :     double *padfY =
    6540         459 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6541             :     double *padfZ =
    6542         459 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6543         459 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6544             : 
    6545         459 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6546         459 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6547         459 :     const double dfErrorThreshold = CPLAtof(
    6548         459 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6549             : 
    6550             :     const bool bOneSourceCornerFailsToReproject =
    6551         459 :         GWKOneSourceCornerFailsToReproject(psJob);
    6552             : 
    6553             :     // Precompute values.
    6554       62854 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6555       62395 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6556             : 
    6557             :     /* ==================================================================== */
    6558             :     /*      Loop over output lines.                                         */
    6559             :     /* ==================================================================== */
    6560       48162 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6561             :     {
    6562             : 
    6563             :         /* --------------------------------------------------------------------
    6564             :          */
    6565             :         /*      Setup points to transform to source image space. */
    6566             :         /* --------------------------------------------------------------------
    6567             :          */
    6568       47703 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6569       47703 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6570     9833535 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6571     9785833 :             padfY[iDstX] = dfY;
    6572       47703 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6573             : 
    6574             :         /* --------------------------------------------------------------------
    6575             :          */
    6576             :         /*      Transform the points from destination pixel/line coordinates */
    6577             :         /*      to source pixel/line coordinates. */
    6578             :         /* --------------------------------------------------------------------
    6579             :          */
    6580       47703 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6581             :                              padfY, padfZ, pabSuccess);
    6582       47703 :         if (dfSrcCoordPrecision > 0.0)
    6583             :         {
    6584           0 :             GWKRoundSourceCoordinates(
    6585             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6586             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6587           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6588             :         }
    6589             :         /* ====================================================================
    6590             :          */
    6591             :         /*      Loop over pixels in output scanline. */
    6592             :         /* ====================================================================
    6593             :          */
    6594     9833535 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6595             :         {
    6596     9785833 :             GPtrDiff_t iSrcOffset = 0;
    6597     9785833 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6598             :                                               padfX, padfY, nSrcXSize,
    6599             :                                               nSrcYSize, iSrcOffset))
    6600     2358945 :                 continue;
    6601             : 
    6602             :             /* --------------------------------------------------------------------
    6603             :              */
    6604             :             /*      Do not try to apply invalid source pixels to the dest. */
    6605             :             /* --------------------------------------------------------------------
    6606             :              */
    6607     9606143 :             if (poWK->panUnifiedSrcValid != nullptr &&
    6608     1127399 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6609             :             {
    6610       49043 :                 if (!bOneSourceCornerFailsToReproject)
    6611             :                 {
    6612       41558 :                     continue;
    6613             :                 }
    6614        7485 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    6615             :                 {
    6616        5224 :                     continue;
    6617             :                 }
    6618             :             }
    6619             : 
    6620             :             /* --------------------------------------------------------------------
    6621             :              */
    6622             :             /*      Do not try to apply transparent source pixels to the
    6623             :              * destination.*/
    6624             :             /* --------------------------------------------------------------------
    6625             :              */
    6626     8431960 :             double dfDensity = 1.0;
    6627             : 
    6628     8431960 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    6629             :             {
    6630     1557335 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    6631     1557335 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    6632     1005075 :                     continue;
    6633             :             }
    6634             : 
    6635             :             /* ====================================================================
    6636             :              */
    6637             :             /*      Loop processing each band. */
    6638             :             /* ====================================================================
    6639             :              */
    6640             : 
    6641     7426888 :             const GPtrDiff_t iDstOffset =
    6642     7426888 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6643             : 
    6644    17415958 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6645             :             {
    6646     9989020 :                 T value = 0;
    6647     9989020 :                 double dfBandDensity = 0.0;
    6648             : 
    6649             :                 /* --------------------------------------------------------------------
    6650             :                  */
    6651             :                 /*      Collect the source value. */
    6652             :                 /* --------------------------------------------------------------------
    6653             :                  */
    6654     9989020 :                 if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
    6655             :                                  &value))
    6656             :                 {
    6657             : 
    6658     9989010 :                     if (poWK->bApplyVerticalShift)
    6659             :                     {
    6660           0 :                         if (!std::isfinite(padfZ[iDstX]))
    6661           0 :                             continue;
    6662             :                         // Subtract padfZ[] since the coordinate transformation
    6663             :                         // is from target to source
    6664           0 :                         value = GWKClampValueT<T>(
    6665           0 :                             double(value) * poWK->dfMultFactorVerticalShift -
    6666           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6667             :                     }
    6668             : 
    6669     9989010 :                     GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    6670             :                                           dfBandDensity, value,
    6671             :                                           bAvoidNoDataSingleBand);
    6672             :                 }
    6673             :             }
    6674             : 
    6675             :             /* --------------------------------------------------------------------
    6676             :              */
    6677             :             /*      Mark this pixel valid/opaque in the output. */
    6678             :             /* --------------------------------------------------------------------
    6679             :              */
    6680             : 
    6681     7426888 :             if (!bAvoidNoDataSingleBand)
    6682             :             {
    6683      424278 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    6684             :             }
    6685             : 
    6686     7426888 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6687             : 
    6688     7426888 :             if (poWK->panDstValid != nullptr)
    6689             :             {
    6690     6156885 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6691             :             }
    6692             :         } /* Next iDstX */
    6693             : 
    6694             :         /* --------------------------------------------------------------------
    6695             :          */
    6696             :         /*      Report progress to the user, and optionally cancel out. */
    6697             :         /* --------------------------------------------------------------------
    6698             :          */
    6699       47703 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6700           0 :             break;
    6701             :     }
    6702             : 
    6703             :     /* -------------------------------------------------------------------- */
    6704             :     /*      Cleanup and return.                                             */
    6705             :     /* -------------------------------------------------------------------- */
    6706         459 :     CPLFree(padfX);
    6707         459 :     CPLFree(padfY);
    6708         459 :     CPLFree(padfZ);
    6709         459 :     CPLFree(pabSuccess);
    6710         459 : }
    6711             : 
    6712         350 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
    6713             : {
    6714         350 :     return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
    6715             : }
    6716             : 
    6717          14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6718             : {
    6719          14 :     return GWKRun(
    6720             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
    6721          14 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
    6722             : }
    6723             : 
    6724           5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6725             : {
    6726           5 :     return GWKRun(
    6727             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
    6728             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
    6729           5 :                                                            GRA_Bilinear>);
    6730             : }
    6731             : 
    6732           6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6733             : {
    6734           6 :     return GWKRun(
    6735             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
    6736             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
    6737           6 :                                                            GRA_Bilinear>);
    6738             : }
    6739             : 
    6740           4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6741             : {
    6742           4 :     return GWKRun(
    6743             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
    6744             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
    6745           4 :                                                            GRA_Bilinear>);
    6746             : }
    6747             : 
    6748             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6749             : 
    6750             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6751             : {
    6752             :     return GWKRun(
    6753             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
    6754             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
    6755             :                                                            GRA_Bilinear>);
    6756             : }
    6757             : #endif
    6758             : 
    6759           5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6760             : {
    6761           5 :     return GWKRun(
    6762             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
    6763           5 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
    6764             : }
    6765             : 
    6766          14 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6767             : {
    6768          14 :     return GWKRun(
    6769             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
    6770          14 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
    6771             : }
    6772             : 
    6773           6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6774             : {
    6775           6 :     return GWKRun(
    6776             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
    6777           6 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
    6778             : }
    6779             : 
    6780           5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6781             : {
    6782           5 :     return GWKRun(
    6783             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
    6784           5 :         GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
    6785             : }
    6786             : 
    6787          45 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
    6788             : {
    6789          45 :     return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
    6790             : }
    6791             : 
    6792          10 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
    6793             : {
    6794          10 :     return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
    6795             : }
    6796             : 
    6797          11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6798             : {
    6799          11 :     return GWKRun(
    6800             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
    6801          11 :         GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
    6802             : }
    6803             : 
    6804          50 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
    6805             : {
    6806          50 :     return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
    6807             : }
    6808             : 
    6809             : /************************************************************************/
    6810             : /*                           GWKAverageOrMode()                         */
    6811             : /*                                                                      */
    6812             : /************************************************************************/
    6813             : 
    6814             : #define COMPUTE_WEIGHT_Y(iSrcY)                                                \
    6815             :     ((iSrcY == iSrcYMin)                                                       \
    6816             :          ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin))        \
    6817             :      : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax)                       \
    6818             :                                : 1.0)
    6819             : 
    6820             : #define COMPUTE_WEIGHT(iSrcX, dfWeightY)                                       \
    6821             :     ((iSrcX == iSrcXMin)       ? ((iSrcXMin + 1 == iSrcXMax)                   \
    6822             :                                       ? dfWeightY                              \
    6823             :                                       : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
    6824             :      : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax))         \
    6825             :                                : dfWeightY)
    6826             : 
    6827             : static void GWKAverageOrModeThread(void *pData);
    6828             : 
    6829         163 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
    6830             : {
    6831         163 :     return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
    6832             : }
    6833             : 
    6834             : /************************************************************************/
    6835             : /*                 GWKAverageOrModeComputeLineCoords()                  */
    6836             : /************************************************************************/
    6837             : 
    6838        8183 : static void GWKAverageOrModeComputeLineCoords(
    6839             :     const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
    6840             :     double *padfY2, double *padfZ, double *padfZ2, int *pabSuccess,
    6841             :     int *pabSuccess2, int iDstY, double dfSrcCoordPrecision,
    6842             :     double dfErrorThreshold)
    6843             : {
    6844        8183 :     const GDALWarpKernel *poWK = psJob->poWK;
    6845        8183 :     const int nDstXSize = poWK->nDstXSize;
    6846             : 
    6847             :     // Setup points to transform to source image space.
    6848     2097530 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6849             :     {
    6850     2089340 :         padfX[iDstX] = iDstX + poWK->nDstXOff;
    6851     2089340 :         padfY[iDstX] = iDstY + poWK->nDstYOff;
    6852     2089340 :         padfZ[iDstX] = 0.0;
    6853     2089340 :         padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
    6854     2089340 :         padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
    6855     2089340 :         padfZ2[iDstX] = 0.0;
    6856             :     }
    6857             : 
    6858             :     /* ----------------------------------------------------------------- */
    6859             :     /*      Transform the points from destination pixel/line coordinates */
    6860             :     /*      to source pixel/line coordinates.                            */
    6861             :     /* ----------------------------------------------------------------- */
    6862        8183 :     poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY,
    6863             :                          padfZ, pabSuccess);
    6864        8183 :     poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
    6865             :                          padfY2, padfZ2, pabSuccess2);
    6866             : 
    6867        8183 :     if (dfSrcCoordPrecision > 0.0)
    6868             :     {
    6869           0 :         GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
    6870             :                                   dfSrcCoordPrecision, dfErrorThreshold,
    6871           0 :                                   poWK->pfnTransformer, psJob->pTransformerArg,
    6872           0 :                                   poWK->nDstXOff, iDstY + poWK->nDstYOff);
    6873           0 :         GWKRoundSourceCoordinates(
    6874             :             nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision,
    6875           0 :             dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6876           0 :             1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff);
    6877             :     }
    6878        8183 : }
    6879             : 
    6880             : /************************************************************************/
    6881             : /*                GWKAverageOrModeComputeSourceCoords()                 */
    6882             : /************************************************************************/
    6883             : 
    6884     2089340 : static bool GWKAverageOrModeComputeSourceCoords(
    6885             :     const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
    6886             :     double *padfY2, int iDstX, int iDstY, int nXMargin, int nYMargin,
    6887             :     // Output:
    6888             :     bool &bWrapOverX, double &dfXMin, double &dfYMin, double &dfXMax,
    6889             :     double &dfYMax, int &iSrcXMin, int &iSrcYMin, int &iSrcXMax, int &iSrcYMax)
    6890             : {
    6891     2089340 :     const GDALWarpKernel *poWK = psJob->poWK;
    6892     2089340 :     const int nSrcXSize = poWK->nSrcXSize;
    6893     2089340 :     const int nSrcYSize = poWK->nSrcYSize;
    6894             : 
    6895             :     // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
    6896             :     // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
    6897     2089340 :     if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6898     1992640 :           padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6899     1992640 :           padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6900     1965720 :           padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6901     1965720 :           padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6902     1912820 :           padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6903     1912310 :           padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
    6904     1910810 :           padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
    6905             :     {
    6906      178602 :         return false;
    6907             :     }
    6908             : 
    6909             :     // Compute corners in source crs.
    6910             : 
    6911             :     // The transformation might not have preserved ordering of
    6912             :     // coordinates so do the necessary swapping (#5433).
    6913             :     // NOTE: this is really an approximative fix. To do something
    6914             :     // more precise we would for example need to compute the
    6915             :     // transformation of coordinates in the
    6916             :     // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
    6917             :     // coordinates, and take the bounding box of the got source
    6918             :     // coordinates.
    6919             : 
    6920     1910740 :     if (padfX[iDstX] > padfX2[iDstX])
    6921      268744 :         std::swap(padfX[iDstX], padfX2[iDstX]);
    6922             : 
    6923             :     // Detect situations where the target pixel is close to the
    6924             :     // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
    6925             :     // close to the left-most and right-most columns of the source
    6926             :     // raster. The 2 value below was experimentally determined to
    6927             :     // avoid false-positives and false-negatives.
    6928             :     // Addresses https://github.com/OSGeo/gdal/issues/6478
    6929     1910740 :     bWrapOverX = false;
    6930     1910740 :     const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
    6931     1910740 :     if (poWK->nSrcXOff == 0 && iDstX + 1 < poWK->nDstXSize &&
    6932     1903470 :         2 * padfX[iDstX] - padfX[iDstX + 1] < nThresholdWrapOverX &&
    6933       17795 :         nSrcXSize - padfX2[iDstX] < nThresholdWrapOverX)
    6934             :     {
    6935             :         // Check there is a discontinuity by checking at mid-pixel.
    6936             :         // NOTE: all this remains fragile. To confidently
    6937             :         // detect antimeridian warping we should probably try to access
    6938             :         // georeferenced coordinates, and not rely only on tests on
    6939             :         // image space coordinates. But accessing georeferenced
    6940             :         // coordinates from here is not trivial, and we would for example
    6941             :         // have to handle both geographic, Mercator, etc.
    6942             :         // Let's hope this heuristics is good enough for now.
    6943        1200 :         double x = iDstX + 0.5 + poWK->nDstXOff;
    6944        1200 :         double y = iDstY + poWK->nDstYOff;
    6945        1200 :         double z = 0;
    6946        1200 :         int bSuccess = FALSE;
    6947        1200 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y, &z,
    6948             :                              &bSuccess);
    6949        1200 :         if (bSuccess && x < padfX[iDstX])
    6950             :         {
    6951        1192 :             bWrapOverX = true;
    6952        1192 :             std::swap(padfX[iDstX], padfX2[iDstX]);
    6953        1192 :             padfX2[iDstX] += nSrcXSize;
    6954             :         }
    6955             :     }
    6956             : 
    6957     1910740 :     dfXMin = padfX[iDstX] - poWK->nSrcXOff;
    6958     1910740 :     dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
    6959     1910740 :     constexpr double EPSILON = 1e-10;
    6960             :     // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
    6961     1910740 :     if (!(dfXMax > -EPSILON && dfXMin < nSrcXSize + EPSILON))
    6962         372 :         return false;
    6963     1910370 :     iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPSILON), 0.0));
    6964     1910370 :     iSrcXMax = static_cast<int>(
    6965     1910370 :         std::min(ceil(dfXMax - EPSILON), static_cast<double>(INT_MAX)));
    6966     1910370 :     if (!bWrapOverX)
    6967     1909180 :         iSrcXMax = std::min(iSrcXMax, nSrcXSize);
    6968     1910370 :     if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
    6969         472 :         iSrcXMax++;
    6970             : 
    6971     1910370 :     if (padfY[iDstX] > padfY2[iDstX])
    6972      270117 :         std::swap(padfY[iDstX], padfY2[iDstX]);
    6973     1910370 :     dfYMin = padfY[iDstX] - poWK->nSrcYOff;
    6974     1910370 :     dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
    6975             :     // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
    6976     1910370 :     if (!(dfYMax > -EPSILON && dfYMin < nSrcYSize + EPSILON))
    6977         238 :         return false;
    6978     1910130 :     iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPSILON), 0.0));
    6979     1910130 :     iSrcYMax = std::min(static_cast<int>(ceil(dfYMax - EPSILON)), nSrcYSize);
    6980     1910130 :     if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
    6981           0 :         iSrcYMax++;
    6982             : 
    6983     1910130 :     return true;
    6984             : }
    6985             : 
    6986             : /************************************************************************/
    6987             : /*                          GWKModeRealType()                           */
    6988             : /************************************************************************/
    6989             : 
    6990       17780 : template <class T> static inline bool IsSame(T a, T b)
    6991             : {
    6992       17780 :     return a == b;
    6993             : }
    6994             : 
    6995           0 : template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b)
    6996             : {
    6997           0 :     return a == b || (CPLIsNan(a) && CPLIsNan(b));
    6998             : }
    6999             : 
    7000          18 : template <> bool IsSame<float>(float a, float b)
    7001             : {
    7002          18 :     return a == b || (std::isnan(a) && std::isnan(b));
    7003             : }
    7004             : 
    7005          56 : template <> bool IsSame<double>(double a, double b)
    7006             : {
    7007          56 :     return a == b || (std::isnan(a) && std::isnan(b));
    7008             : }
    7009             : 
    7010          19 : template <class T> static void GWKModeRealType(GWKJobStruct *psJob)
    7011             : {
    7012          19 :     const GDALWarpKernel *poWK = psJob->poWK;
    7013          19 :     const int iYMin = psJob->iYMin;
    7014          19 :     const int iYMax = psJob->iYMax;
    7015          19 :     const int nDstXSize = poWK->nDstXSize;
    7016          19 :     const int nSrcXSize = poWK->nSrcXSize;
    7017          19 :     const int nSrcYSize = poWK->nSrcYSize;
    7018          19 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7019             : 
    7020          19 :     T *pVals = nullptr;
    7021          19 :     float *pafCounts = nullptr;
    7022             : 
    7023          19 :     if (nSrcXSize > 0 && nSrcYSize > 0)
    7024             :     {
    7025             :         pVals = static_cast<T *>(
    7026          19 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(T)));
    7027             :         pafCounts = static_cast<float *>(
    7028          19 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    7029          19 :         if (pVals == nullptr || pafCounts == nullptr)
    7030             :         {
    7031           0 :             VSIFree(pVals);
    7032           0 :             VSIFree(pafCounts);
    7033           0 :             return;
    7034             :         }
    7035             :     }
    7036             : 
    7037             :     /* -------------------------------------------------------------------- */
    7038             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7039             :     /*      scanlines worth of positions.                                   */
    7040             :     /* -------------------------------------------------------------------- */
    7041             : 
    7042             :     double *padfX =
    7043          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7044             :     double *padfY =
    7045          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7046             :     double *padfZ =
    7047          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7048             :     double *padfX2 =
    7049          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7050             :     double *padfY2 =
    7051          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7052             :     double *padfZ2 =
    7053          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7054          19 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7055          19 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7056             : 
    7057          19 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7058          19 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7059          19 :     const double dfErrorThreshold = CPLAtof(
    7060          19 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7061          19 :     const bool bAvoidNoDataSingleBand =
    7062          19 :         poWK->nBands == 1 ||
    7063           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7064             :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    7065             : 
    7066          19 :     const int nXMargin =
    7067          19 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7068          19 :     const int nYMargin =
    7069          19 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7070             : 
    7071             :     /* ==================================================================== */
    7072             :     /*      Loop over output lines.                                         */
    7073             :     /* ==================================================================== */
    7074         116 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7075             :     {
    7076          97 :         GWKAverageOrModeComputeLineCoords(
    7077             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7078             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7079             : 
    7080             :         // Loop over pixels in output scanline.
    7081        3514 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7082             :         {
    7083        3417 :             GPtrDiff_t iSrcOffset = 0;
    7084        3417 :             double dfDensity = 1.0;
    7085        3417 :             bool bHasFoundDensity = false;
    7086             : 
    7087        3417 :             bool bWrapOverX = false;
    7088        3417 :             double dfXMin = 0;
    7089        3417 :             double dfYMin = 0;
    7090        3417 :             double dfXMax = 0;
    7091        3417 :             double dfYMax = 0;
    7092        3417 :             int iSrcXMin = 0;
    7093        3417 :             int iSrcYMin = 0;
    7094        3417 :             int iSrcXMax = 0;
    7095        3417 :             int iSrcYMax = 0;
    7096        3417 :             if (!GWKAverageOrModeComputeSourceCoords(
    7097             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7098             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7099             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7100             :             {
    7101           0 :                 continue;
    7102             :             }
    7103             : 
    7104        3417 :             const GPtrDiff_t iDstOffset =
    7105        3417 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7106             : 
    7107             :             // Loop processing each band.
    7108        6834 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7109             :             {
    7110        3417 :                 double dfBandDensity = 0.0;
    7111             : 
    7112        3417 :                 int nBins = 0;
    7113        3417 :                 int iModeIndex = -1;
    7114        3417 :                 T nVal{};
    7115             : 
    7116       10248 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7117             :                 {
    7118        6831 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7119        6831 :                     iSrcOffset =
    7120        6831 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7121       20530 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7122             :                          iSrcX++, iSrcOffset++)
    7123             :                     {
    7124       13699 :                         if (bWrapOverX)
    7125           0 :                             iSrcOffset =
    7126           0 :                                 (iSrcX % nSrcXSize) +
    7127           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7128             : 
    7129       13699 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7130           0 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7131           0 :                             continue;
    7132             : 
    7133       13699 :                         if (GWKGetPixelT(poWK, iBand, iSrcOffset,
    7134       27398 :                                          &dfBandDensity, &nVal) &&
    7135       13699 :                             dfBandDensity > BAND_DENSITY_THRESHOLD)
    7136             :                         {
    7137       13699 :                             const double dfWeight =
    7138       13699 :                                 COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7139             : 
    7140             :                             // Check array for existing entry.
    7141       13699 :                             int i = 0;
    7142       29194 :                             for (i = 0; i < nBins; ++i)
    7143             :                             {
    7144       17807 :                                 if (IsSame(pVals[i], nVal))
    7145             :                                 {
    7146             : 
    7147        2312 :                                     pafCounts[i] +=
    7148        2312 :                                         static_cast<float>(dfWeight);
    7149        2312 :                                     bool bValIsMaxCount =
    7150        2312 :                                         (pafCounts[i] > pafCounts[iModeIndex]);
    7151             : 
    7152        2312 :                                     if (!bValIsMaxCount &&
    7153        1498 :                                         pafCounts[i] == pafCounts[iModeIndex])
    7154             :                                     {
    7155        1490 :                                         switch (eTieStrategy)
    7156             :                                         {
    7157        1477 :                                             case GWKTS_First:
    7158        1477 :                                                 break;
    7159           6 :                                             case GWKTS_Min:
    7160           6 :                                                 bValIsMaxCount =
    7161           6 :                                                     nVal < pVals[iModeIndex];
    7162           6 :                                                 break;
    7163           7 :                                             case GWKTS_Max:
    7164           7 :                                                 bValIsMaxCount =
    7165           7 :                                                     nVal > pVals[iModeIndex];
    7166           7 :                                                 break;
    7167             :                                         }
    7168             :                                     }
    7169             : 
    7170        2312 :                                     if (bValIsMaxCount)
    7171             :                                     {
    7172         817 :                                         iModeIndex = i;
    7173             :                                     }
    7174             : 
    7175        2312 :                                     break;
    7176             :                                 }
    7177             :                             }
    7178             : 
    7179             :                             // Add to arr if entry not already there.
    7180       13699 :                             if (i == nBins)
    7181             :                             {
    7182       11387 :                                 pVals[i] = nVal;
    7183       11387 :                                 pafCounts[i] = static_cast<float>(dfWeight);
    7184             : 
    7185       11387 :                                 if (iModeIndex < 0)
    7186        3417 :                                     iModeIndex = i;
    7187             : 
    7188       11387 :                                 ++nBins;
    7189             :                             }
    7190             :                         }
    7191             :                     }
    7192             :                 }
    7193             : 
    7194        3417 :                 if (iModeIndex != -1)
    7195             :                 {
    7196        3417 :                     nVal = pVals[iModeIndex];
    7197        3417 :                     dfBandDensity = 1;
    7198        3417 :                     bHasFoundDensity = true;
    7199             :                 }
    7200             : 
    7201             :                 // We have a computed value from the source.  Now apply it
    7202             :                 // to the destination pixel
    7203        3417 :                 if (bHasFoundDensity)
    7204             :                 {
    7205        3417 :                     GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    7206             :                                           dfBandDensity, nVal,
    7207             :                                           bAvoidNoDataSingleBand);
    7208             :                 }
    7209             :             }
    7210             : 
    7211        3417 :             if (!bHasFoundDensity)
    7212           0 :                 continue;
    7213             : 
    7214        3417 :             if (!bAvoidNoDataSingleBand)
    7215             :             {
    7216           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7217             :             }
    7218             : 
    7219             :             /* --------------------------------------------------------------------
    7220             :              */
    7221             :             /*      Update destination density/validity masks. */
    7222             :             /* --------------------------------------------------------------------
    7223             :              */
    7224        3417 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7225             : 
    7226        3417 :             if (poWK->panDstValid != nullptr)
    7227             :             {
    7228           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7229             :             }
    7230             :         } /* Next iDstX */
    7231             : 
    7232             :         /* --------------------------------------------------------------------
    7233             :          */
    7234             :         /*      Report progress to the user, and optionally cancel out. */
    7235             :         /* --------------------------------------------------------------------
    7236             :          */
    7237          97 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    7238           0 :             break;
    7239             :     }
    7240             : 
    7241             :     /* -------------------------------------------------------------------- */
    7242             :     /*      Cleanup and return.                                             */
    7243             :     /* -------------------------------------------------------------------- */
    7244          19 :     CPLFree(padfX);
    7245          19 :     CPLFree(padfY);
    7246          19 :     CPLFree(padfZ);
    7247          19 :     CPLFree(padfX2);
    7248          19 :     CPLFree(padfY2);
    7249          19 :     CPLFree(padfZ2);
    7250          19 :     CPLFree(pabSuccess);
    7251          19 :     CPLFree(pabSuccess2);
    7252          19 :     VSIFree(pVals);
    7253          19 :     VSIFree(pafCounts);
    7254             : }
    7255             : 
    7256             : /************************************************************************/
    7257             : /*                         GWKModeComplexType()                         */
    7258             : /************************************************************************/
    7259             : 
    7260           8 : static void GWKModeComplexType(GWKJobStruct *psJob)
    7261             : {
    7262           8 :     const GDALWarpKernel *poWK = psJob->poWK;
    7263           8 :     const int iYMin = psJob->iYMin;
    7264           8 :     const int iYMax = psJob->iYMax;
    7265           8 :     const int nDstXSize = poWK->nDstXSize;
    7266           8 :     const int nSrcXSize = poWK->nSrcXSize;
    7267           8 :     const int nSrcYSize = poWK->nSrcYSize;
    7268           8 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7269             :     const double dfMultFactorVerticalShiftPipeline =
    7270           8 :         poWK->bApplyVerticalShift
    7271           8 :             ? CPLAtof(CSLFetchNameValueDef(
    7272           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    7273             :                   "1.0"))
    7274           8 :             : 0.0;
    7275             :     const bool bAvoidNoDataSingleBand =
    7276           8 :         poWK->nBands == 1 ||
    7277           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7278           8 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    7279             : 
    7280           8 :     double *padfRealVals = nullptr;
    7281           8 :     double *padfImagVals = nullptr;
    7282           8 :     float *pafCounts = nullptr;
    7283             : 
    7284           8 :     if (nSrcXSize > 0 && nSrcYSize > 0)
    7285             :     {
    7286             :         padfRealVals = static_cast<double *>(
    7287           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
    7288             :         padfImagVals = static_cast<double *>(
    7289           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
    7290             :         pafCounts = static_cast<float *>(
    7291           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    7292           8 :         if (padfRealVals == nullptr || padfImagVals == nullptr ||
    7293             :             pafCounts == nullptr)
    7294             :         {
    7295           0 :             VSIFree(padfRealVals);
    7296           0 :             VSIFree(padfImagVals);
    7297           0 :             VSIFree(pafCounts);
    7298           0 :             return;
    7299             :         }
    7300             :     }
    7301             : 
    7302             :     /* -------------------------------------------------------------------- */
    7303             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7304             :     /*      scanlines worth of positions.                                   */
    7305             :     /* -------------------------------------------------------------------- */
    7306             : 
    7307             :     double *padfX =
    7308           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7309             :     double *padfY =
    7310           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7311             :     double *padfZ =
    7312           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7313             :     double *padfX2 =
    7314           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7315             :     double *padfY2 =
    7316           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7317             :     double *padfZ2 =
    7318           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7319           8 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7320           8 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7321             : 
    7322           8 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7323           8 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7324           8 :     const double dfErrorThreshold = CPLAtof(
    7325           8 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7326             : 
    7327             :     const int nXMargin =
    7328           8 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7329             :     const int nYMargin =
    7330           8 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7331             : 
    7332             :     /* ==================================================================== */
    7333             :     /*      Loop over output lines.                                         */
    7334             :     /* ==================================================================== */
    7335          16 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7336             :     {
    7337           8 :         GWKAverageOrModeComputeLineCoords(
    7338             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7339             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7340             : 
    7341             :         // Loop over pixels in output scanline.
    7342          16 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7343             :         {
    7344           8 :             GPtrDiff_t iSrcOffset = 0;
    7345           8 :             double dfDensity = 1.0;
    7346           8 :             bool bHasFoundDensity = false;
    7347             : 
    7348           8 :             bool bWrapOverX = false;
    7349           8 :             double dfXMin = 0;
    7350           8 :             double dfYMin = 0;
    7351           8 :             double dfXMax = 0;
    7352           8 :             double dfYMax = 0;
    7353           8 :             int iSrcXMin = 0;
    7354           8 :             int iSrcYMin = 0;
    7355           8 :             int iSrcXMax = 0;
    7356           8 :             int iSrcYMax = 0;
    7357           8 :             if (!GWKAverageOrModeComputeSourceCoords(
    7358             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7359             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7360             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7361             :             {
    7362           0 :                 continue;
    7363             :             }
    7364             : 
    7365           8 :             const GPtrDiff_t iDstOffset =
    7366           8 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7367             : 
    7368             :             // Loop processing each band.
    7369          16 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7370             :             {
    7371           8 :                 double dfBandDensity = 0.0;
    7372             : 
    7373           8 :                 int nBins = 0;
    7374           8 :                 int iModeIndex = -1;
    7375           8 :                 double dfValueReal = 0;
    7376           8 :                 double dfValueImag = 0;
    7377             : 
    7378          16 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7379             :                 {
    7380           8 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7381           8 :                     iSrcOffset =
    7382           8 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7383          38 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7384             :                          iSrcX++, iSrcOffset++)
    7385             :                     {
    7386          30 :                         if (bWrapOverX)
    7387           0 :                             iSrcOffset =
    7388           0 :                                 (iSrcX % nSrcXSize) +
    7389           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7390             : 
    7391          30 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7392           0 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7393           0 :                             continue;
    7394             : 
    7395          30 :                         if (GWKGetPixelValue(poWK, iBand, iSrcOffset,
    7396             :                                              &dfBandDensity, &dfValueReal,
    7397          60 :                                              &dfValueImag) &&
    7398          30 :                             dfBandDensity > BAND_DENSITY_THRESHOLD)
    7399             :                         {
    7400          30 :                             const double dfWeight =
    7401          30 :                                 COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7402             : 
    7403             :                             // Check array for existing entry.
    7404          30 :                             int i = 0;
    7405          49 :                             for (i = 0; i < nBins; ++i)
    7406             :                             {
    7407          47 :                                 if (IsSame(padfRealVals[i], dfValueReal) &&
    7408          14 :                                     IsSame(padfImagVals[i], dfValueImag))
    7409             :                                 {
    7410             : 
    7411          14 :                                     pafCounts[i] +=
    7412          14 :                                         static_cast<float>(dfWeight);
    7413          14 :                                     bool bValIsMaxCount =
    7414          14 :                                         (pafCounts[i] > pafCounts[iModeIndex]);
    7415             : 
    7416          14 :                                     if (!bValIsMaxCount &&
    7417           6 :                                         pafCounts[i] == pafCounts[iModeIndex])
    7418             :                                     {
    7419           3 :                                         switch (eTieStrategy)
    7420             :                                         {
    7421           3 :                                             case GWKTS_First:
    7422           3 :                                                 break;
    7423           0 :                                             case GWKTS_Min:
    7424           0 :                                                 bValIsMaxCount =
    7425           0 :                                                     dfValueReal <
    7426           0 :                                                     padfRealVals[iModeIndex];
    7427           0 :                                                 break;
    7428           0 :                                             case GWKTS_Max:
    7429           0 :                                                 bValIsMaxCount =
    7430           0 :                                                     dfValueReal >
    7431           0 :                                                     padfRealVals[iModeIndex];
    7432           0 :                                                 break;
    7433             :                                         }
    7434             :                                     }
    7435             : 
    7436          14 :                                     if (bValIsMaxCount)
    7437             :                                     {
    7438           8 :                                         iModeIndex = i;
    7439             :                                     }
    7440             : 
    7441          14 :                                     break;
    7442             :                                 }
    7443             :                             }
    7444             : 
    7445             :                             // Add to arr if entry not already there.
    7446          30 :                             if (i == nBins)
    7447             :                             {
    7448          16 :                                 padfRealVals[i] = dfValueReal;
    7449          16 :                                 padfImagVals[i] = dfValueImag;
    7450          16 :                                 pafCounts[i] = static_cast<float>(dfWeight);
    7451             : 
    7452          16 :                                 if (iModeIndex < 0)
    7453           8 :                                     iModeIndex = i;
    7454             : 
    7455          16 :                                 ++nBins;
    7456             :                             }
    7457             :                         }
    7458             :                     }
    7459             :                 }
    7460             : 
    7461           8 :                 if (iModeIndex != -1)
    7462             :                 {
    7463           8 :                     dfValueReal = padfRealVals[iModeIndex];
    7464           8 :                     dfValueImag = padfImagVals[iModeIndex];
    7465           8 :                     dfBandDensity = 1;
    7466             : 
    7467           8 :                     if (poWK->bApplyVerticalShift)
    7468             :                     {
    7469           0 :                         if (!std::isfinite(padfZ[iDstX]))
    7470           0 :                             continue;
    7471             :                         // Subtract padfZ[] since the coordinate
    7472             :                         // transformation is from target to source
    7473           0 :                         dfValueReal =
    7474           0 :                             dfValueReal * poWK->dfMultFactorVerticalShift -
    7475           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    7476             :                     }
    7477             : 
    7478           8 :                     bHasFoundDensity = true;
    7479             :                 }
    7480             : 
    7481             :                 // We have a computed value from the source.  Now apply it
    7482             :                 // to the destination pixel
    7483           8 :                 if (bHasFoundDensity)
    7484             :                 {
    7485           8 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    7486             :                                      dfValueReal, dfValueImag,
    7487             :                                      bAvoidNoDataSingleBand);
    7488             :                 }
    7489             :             }
    7490             : 
    7491           8 :             if (!bHasFoundDensity)
    7492           0 :                 continue;
    7493             : 
    7494           8 :             if (!bAvoidNoDataSingleBand)
    7495             :             {
    7496           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7497             :             }
    7498             : 
    7499             :             /* --------------------------------------------------------------------
    7500             :              */
    7501             :             /*      Update destination density/validity masks. */
    7502             :             /* --------------------------------------------------------------------
    7503             :              */
    7504           8 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7505             : 
    7506           8 :             if (poWK->panDstValid != nullptr)
    7507             :             {
    7508           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7509             :             }
    7510             :         } /* Next iDstX */
    7511             : 
    7512             :         /* --------------------------------------------------------------------
    7513             :          */
    7514             :         /*      Report progress to the user, and optionally cancel out. */
    7515             :         /* --------------------------------------------------------------------
    7516             :          */
    7517           8 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    7518           0 :             break;
    7519             :     }
    7520             : 
    7521             :     /* -------------------------------------------------------------------- */
    7522             :     /*      Cleanup and return.                                             */
    7523             :     /* -------------------------------------------------------------------- */
    7524           8 :     CPLFree(padfX);
    7525           8 :     CPLFree(padfY);
    7526           8 :     CPLFree(padfZ);
    7527           8 :     CPLFree(padfX2);
    7528           8 :     CPLFree(padfY2);
    7529           8 :     CPLFree(padfZ2);
    7530           8 :     CPLFree(pabSuccess);
    7531           8 :     CPLFree(pabSuccess2);
    7532           8 :     VSIFree(padfRealVals);
    7533           8 :     VSIFree(padfImagVals);
    7534           8 :     VSIFree(pafCounts);
    7535             : }
    7536             : 
    7537             : /************************************************************************/
    7538             : /*                       GWKAverageOrModeThread()                       */
    7539             : /************************************************************************/
    7540             : 
    7541             : // Overall logic based on GWKGeneralCaseThread().
    7542         163 : static void GWKAverageOrModeThread(void *pData)
    7543             : {
    7544         163 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    7545         163 :     const GDALWarpKernel *poWK = psJob->poWK;
    7546         163 :     const int iYMin = psJob->iYMin;
    7547         163 :     const int iYMax = psJob->iYMax;
    7548             :     const double dfMultFactorVerticalShiftPipeline =
    7549         163 :         poWK->bApplyVerticalShift
    7550         163 :             ? CPLAtof(CSLFetchNameValueDef(
    7551           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    7552             :                   "1.0"))
    7553         163 :             : 0.0;
    7554             :     const bool bAvoidNoDataSingleBand =
    7555         194 :         poWK->nBands == 1 ||
    7556          31 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7557         163 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    7558             : 
    7559         163 :     const int nDstXSize = poWK->nDstXSize;
    7560         163 :     const int nSrcXSize = poWK->nSrcXSize;
    7561             : 
    7562             :     /* -------------------------------------------------------------------- */
    7563             :     /*      Find out which algorithm to use (small optim.)                  */
    7564             :     /* -------------------------------------------------------------------- */
    7565             : 
    7566             :     // Only used for GRA_Mode
    7567         163 :     float *pafCounts = nullptr;
    7568         163 :     int nBins = 0;
    7569         163 :     int nBinsOffset = 0;
    7570         163 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7571             : 
    7572             :     // Only used with Q1, Med and Q3
    7573         163 :     float quant = 0.0f;
    7574             : 
    7575             :     // To control array allocation only when data type is complex
    7576         163 :     const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
    7577             : 
    7578         163 :     if (poWK->eResample == GRA_Mode)
    7579             :     {
    7580          45 :         if (poWK->bApplyVerticalShift)
    7581             :         {
    7582           0 :             return GWKModeComplexType(psJob);
    7583             :         }
    7584             : 
    7585          45 :         switch (poWK->eWorkingDataType)
    7586             :         {
    7587           7 :             case GDT_UInt8:
    7588           7 :                 nBins = 256;
    7589           7 :                 break;
    7590             : 
    7591           0 :             case GDT_Int8:
    7592           0 :                 nBins = 256;
    7593           0 :                 nBinsOffset = nBins / 2;
    7594           0 :                 break;
    7595             : 
    7596           1 :             case GDT_UInt16:
    7597           1 :                 nBins = 65536;
    7598           1 :                 break;
    7599             : 
    7600          10 :             case GDT_Int16:
    7601          10 :                 nBins = 65536;
    7602          10 :                 nBinsOffset = nBins / 2;
    7603          10 :                 break;
    7604             : 
    7605          10 :             case GDT_Int32:
    7606          10 :                 return GWKModeRealType<int32_t>(psJob);
    7607             : 
    7608           1 :             case GDT_UInt32:
    7609           1 :                 return GWKModeRealType<uint32_t>(psJob);
    7610             : 
    7611           1 :             case GDT_Int64:
    7612           1 :                 return GWKModeRealType<int64_t>(psJob);
    7613             : 
    7614           1 :             case GDT_UInt64:
    7615           1 :                 return GWKModeRealType<uint64_t>(psJob);
    7616             : 
    7617           0 :             case GDT_Float16:
    7618           0 :                 return GWKModeRealType<GFloat16>(psJob);
    7619             : 
    7620           4 :             case GDT_Float32:
    7621           4 :                 return GWKModeRealType<float>(psJob);
    7622             : 
    7623           2 :             case GDT_Float64:
    7624           2 :                 return GWKModeRealType<double>(psJob);
    7625             : 
    7626           8 :             case GDT_CInt16:
    7627             :             case GDT_CInt32:
    7628             :             case GDT_CFloat16:
    7629             :             case GDT_CFloat32:
    7630             :             case GDT_CFloat64:
    7631           8 :                 return GWKModeComplexType(psJob);
    7632             : 
    7633           0 :             case GDT_Unknown:
    7634             :             case GDT_TypeCount:
    7635           0 :                 CPLAssert(false);
    7636             :                 return;
    7637             :         }
    7638             : 
    7639          18 :         if (nBins)
    7640             :         {
    7641             :             pafCounts =
    7642          18 :                 static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
    7643          18 :             if (pafCounts == nullptr)
    7644           0 :                 return;
    7645             :         }
    7646             :     }
    7647         118 :     else if (poWK->eResample == GRA_Med)
    7648             :     {
    7649           6 :         quant = 0.5f;
    7650             :     }
    7651         112 :     else if (poWK->eResample == GRA_Q1)
    7652             :     {
    7653          10 :         quant = 0.25f;
    7654             :     }
    7655         102 :     else if (poWK->eResample == GRA_Q3)
    7656             :     {
    7657           5 :         quant = 0.75f;
    7658             :     }
    7659          97 :     else if (poWK->eResample != GRA_Average && poWK->eResample != GRA_RMS &&
    7660          11 :              poWK->eResample != GRA_Min && poWK->eResample != GRA_Max)
    7661             :     {
    7662             :         // Other resample algorithms not permitted here.
    7663           0 :         CPLError(CE_Fatal, CPLE_AppDefined,
    7664             :                  "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
    7665             :                  "illegal resample");
    7666             :     }
    7667             : 
    7668         136 :     CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread()");
    7669             : 
    7670             :     /* -------------------------------------------------------------------- */
    7671             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7672             :     /*      scanlines worth of positions.                                   */
    7673             :     /* -------------------------------------------------------------------- */
    7674             : 
    7675             :     double *padfX =
    7676         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7677             :     double *padfY =
    7678         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7679             :     double *padfZ =
    7680         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7681             :     double *padfX2 =
    7682         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7683             :     double *padfY2 =
    7684         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7685             :     double *padfZ2 =
    7686         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7687         136 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7688         136 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7689             : 
    7690         136 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7691         136 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7692         136 :     const double dfErrorThreshold = CPLAtof(
    7693         136 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7694             : 
    7695             :     const double dfExcludedValuesThreshold =
    7696         136 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7697             :                                      "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
    7698         136 :         100.0;
    7699             :     const double dfNodataValuesThreshold =
    7700         136 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7701             :                                      "NODATA_VALUES_PCT_THRESHOLD", "100")) /
    7702         136 :         100.0;
    7703             : 
    7704             :     const int nXMargin =
    7705         136 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7706             :     const int nYMargin =
    7707         136 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7708             : 
    7709             :     /* ==================================================================== */
    7710             :     /*      Loop over output lines.                                         */
    7711             :     /* ==================================================================== */
    7712        8214 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7713             :     {
    7714        8078 :         GWKAverageOrModeComputeLineCoords(
    7715             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7716             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7717             : 
    7718             :         /* ====================================================================
    7719             :          */
    7720             :         /*      Loop over pixels in output scanline. */
    7721             :         /* ====================================================================
    7722             :          */
    7723     2094000 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7724             :         {
    7725     2085920 :             GPtrDiff_t iSrcOffset = 0;
    7726     2085920 :             double dfDensity = 1.0;
    7727     2085920 :             bool bHasFoundDensity = false;
    7728             : 
    7729     2085920 :             bool bWrapOverX = false;
    7730     2085920 :             double dfXMin = 0;
    7731     2085920 :             double dfYMin = 0;
    7732     2085920 :             double dfXMax = 0;
    7733     2085920 :             double dfYMax = 0;
    7734     2085920 :             int iSrcXMin = 0;
    7735     2085920 :             int iSrcYMin = 0;
    7736     2085920 :             int iSrcXMax = 0;
    7737     2085920 :             int iSrcYMax = 0;
    7738     2085920 :             if (!GWKAverageOrModeComputeSourceCoords(
    7739             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7740             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7741             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7742             :             {
    7743      687183 :                 continue;
    7744             :             }
    7745             : 
    7746     1906710 :             const GPtrDiff_t iDstOffset =
    7747     1906710 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7748             : 
    7749     1906710 :             bool bDone = false;
    7750             : 
    7751             :             // Special Average mode where we process all bands together,
    7752             :             // to avoid averaging tuples that match an entry of m_aadfExcludedValues
    7753     1906710 :             constexpr double EPSILON = 1e-10;
    7754     4614100 :             if (poWK->eResample == GRA_Average &&
    7755      800681 :                 (!poWK->m_aadfExcludedValues.empty() ||
    7756      589832 :                  dfNodataValuesThreshold < 1 - EPSILON) &&
    7757     2707390 :                 !poWK->bApplyVerticalShift && !bIsComplex)
    7758             :             {
    7759      589832 :                 double dfTotalWeightInvalid = 0.0;
    7760      589832 :                 double dfTotalWeightExcluded = 0.0;
    7761      589832 :                 double dfTotalWeightRegular = 0.0;
    7762     1179660 :                 std::vector<double> adfValueReal(poWK->nBands, 0);
    7763     1179660 :                 std::vector<double> adfValueAveraged(poWK->nBands, 0);
    7764             :                 std::vector<int> anCountExcludedValues(
    7765      589832 :                     poWK->m_aadfExcludedValues.size(), 0);
    7766             : 
    7767     2162710 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7768             :                 {
    7769     1572880 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7770     1572880 :                     iSrcOffset =
    7771     1572880 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7772     6291500 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7773             :                          iSrcX++, iSrcOffset++)
    7774             :                     {
    7775     4718620 :                         if (bWrapOverX)
    7776           0 :                             iSrcOffset =
    7777           0 :                                 (iSrcX % nSrcXSize) +
    7778           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7779             : 
    7780     4718620 :                         const double dfWeight =
    7781     4718620 :                             COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7782     4718620 :                         if (dfWeight <= 0)
    7783           0 :                             continue;
    7784             : 
    7785     4718640 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7786          12 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7787             :                         {
    7788           3 :                             dfTotalWeightInvalid += dfWeight;
    7789           3 :                             continue;
    7790             :                         }
    7791             : 
    7792     4718620 :                         bool bAllValid = true;
    7793     8651150 :                         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7794             :                         {
    7795     7340300 :                             double dfBandDensity = 0;
    7796     7340300 :                             double dfValueImagTmp = 0;
    7797    11272800 :                             if (!(GWKGetPixelValue(
    7798             :                                       poWK, iBand, iSrcOffset, &dfBandDensity,
    7799     7340300 :                                       &adfValueReal[iBand], &dfValueImagTmp) &&
    7800     3932530 :                                   dfBandDensity > BAND_DENSITY_THRESHOLD))
    7801             :                             {
    7802     3407770 :                                 bAllValid = false;
    7803     3407770 :                                 break;
    7804             :                             }
    7805             :                         }
    7806             : 
    7807     4718620 :                         if (!bAllValid)
    7808             :                         {
    7809     3407770 :                             dfTotalWeightInvalid += dfWeight;
    7810     3407770 :                             continue;
    7811             :                         }
    7812             : 
    7813     1310850 :                         bool bExcludedValueFound = false;
    7814     2490500 :                         for (size_t i = 0;
    7815     2490500 :                              i < poWK->m_aadfExcludedValues.size(); ++i)
    7816             :                         {
    7817     1179670 :                             if (poWK->m_aadfExcludedValues[i] == adfValueReal)
    7818             :                             {
    7819          22 :                                 bExcludedValueFound = true;
    7820          22 :                                 ++anCountExcludedValues[i];
    7821          22 :                                 dfTotalWeightExcluded += dfWeight;
    7822          22 :                                 break;
    7823             :                             }
    7824             :                         }
    7825     1310850 :                         if (!bExcludedValueFound)
    7826             :                         {
    7827             :                             // Weighted incremental algorithm mean
    7828             :                             // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7829     1310830 :                             dfTotalWeightRegular += dfWeight;
    7830     5243290 :                             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7831             :                             {
    7832     3932460 :                                 adfValueAveraged[iBand] +=
    7833     7864930 :                                     (dfWeight / dfTotalWeightRegular) *
    7834     7864930 :                                     (adfValueReal[iBand] -
    7835     3932460 :                                      adfValueAveraged[iBand]);
    7836             :                             }
    7837             :                         }
    7838             :                     }
    7839             :                 }
    7840             : 
    7841      589832 :                 const double dfTotalWeight = dfTotalWeightInvalid +
    7842             :                                              dfTotalWeightExcluded +
    7843             :                                              dfTotalWeightRegular;
    7844      589832 :                 if (dfTotalWeightInvalid > 0 &&
    7845             :                     dfTotalWeightInvalid >=
    7846      458751 :                         dfNodataValuesThreshold * dfTotalWeight)
    7847             :                 {
    7848             :                     // Do nothing. Let bHasFoundDensity to false.
    7849             :                 }
    7850      131085 :                 else if (dfTotalWeightExcluded > 0 &&
    7851             :                          dfTotalWeightExcluded >=
    7852           7 :                              dfExcludedValuesThreshold * dfTotalWeight)
    7853             :                 {
    7854             :                     // Find the most represented excluded value tuple
    7855           3 :                     size_t iExcludedValue = 0;
    7856           3 :                     int nExcludedValueCount = 0;
    7857           6 :                     for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
    7858             :                          ++i)
    7859             :                     {
    7860           3 :                         if (anCountExcludedValues[i] > nExcludedValueCount)
    7861             :                         {
    7862           3 :                             iExcludedValue = i;
    7863           3 :                             nExcludedValueCount = anCountExcludedValues[i];
    7864             :                         }
    7865             :                     }
    7866             : 
    7867           3 :                     bHasFoundDensity = true;
    7868             : 
    7869          12 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7870             :                     {
    7871           9 :                         GWKSetPixelValue(
    7872             :                             poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
    7873           9 :                             poWK->m_aadfExcludedValues[iExcludedValue][iBand],
    7874             :                             0, bAvoidNoDataSingleBand);
    7875             :                     }
    7876             : 
    7877           3 :                     if (!bAvoidNoDataSingleBand)
    7878             :                     {
    7879           0 :                         GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7880           3 :                     }
    7881             :                 }
    7882      131082 :                 else if (dfTotalWeightRegular > 0)
    7883             :                 {
    7884      131082 :                     bHasFoundDensity = true;
    7885             : 
    7886      524324 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7887             :                     {
    7888      393242 :                         GWKSetPixelValue(poWK, iBand, iDstOffset,
    7889             :                                          /* dfBandDensity = */ 1.0,
    7890      393242 :                                          adfValueAveraged[iBand], 0,
    7891             :                                          bAvoidNoDataSingleBand);
    7892             :                     }
    7893             : 
    7894      131082 :                     if (!bAvoidNoDataSingleBand)
    7895             :                     {
    7896           0 :                         GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7897             :                     }
    7898             :                 }
    7899             : 
    7900             :                 // Skip below loop on bands
    7901      589832 :                 bDone = true;
    7902             :             }
    7903             : 
    7904             :             /* ====================================================================
    7905             :              */
    7906             :             /*      Loop processing each band. */
    7907             :             /* ====================================================================
    7908             :              */
    7909             : 
    7910     4730010 :             for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
    7911             :             {
    7912     2823310 :                 double dfBandDensity = 0.0;
    7913     2823310 :                 double dfValueReal = 0.0;
    7914     2823310 :                 double dfValueImag = 0.0;
    7915     2823310 :                 double dfValueRealTmp = 0.0;
    7916     2823310 :                 double dfValueImagTmp = 0.0;
    7917             : 
    7918             :                 /* --------------------------------------------------------------------
    7919             :                  */
    7920             :                 /*      Collect the source value. */
    7921             :                 /* --------------------------------------------------------------------
    7922             :                  */
    7923             : 
    7924             :                 // Loop over source lines and pixels - 3 possible algorithms.
    7925             : 
    7926     2823310 :                 if (poWK->eResample == GRA_Average)
    7927             :                 {
    7928      300849 :                     double dfTotalWeight = 0.0;
    7929             : 
    7930             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7931             :                     // in gcore/overview.cpp.
    7932      631308 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7933             :                     {
    7934      330459 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7935      330459 :                         iSrcOffset = iSrcXMin +
    7936      330459 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7937      773407 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7938             :                              iSrcX++, iSrcOffset++)
    7939             :                         {
    7940      442948 :                             if (bWrapOverX)
    7941        1371 :                                 iSrcOffset =
    7942        1371 :                                     (iSrcX % nSrcXSize) +
    7943        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7944             : 
    7945      442952 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7946           4 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7947             :                                             iSrcOffset))
    7948             :                             {
    7949           1 :                                 continue;
    7950             :                             }
    7951             : 
    7952      442947 :                             if (GWKGetPixelValue(
    7953             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7954      885894 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7955      442947 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7956             :                             {
    7957      442947 :                                 const double dfWeight =
    7958      442947 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7959      442947 :                                 if (dfWeight > 0)
    7960             :                                 {
    7961             :                                     // Weighted incremental algorithm mean
    7962             :                                     // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7963      442947 :                                     dfTotalWeight += dfWeight;
    7964      442947 :                                     dfValueReal +=
    7965      442947 :                                         (dfWeight / dfTotalWeight) *
    7966      442947 :                                         (dfValueRealTmp - dfValueReal);
    7967      442947 :                                     if (bIsComplex)
    7968             :                                     {
    7969         252 :                                         dfValueImag +=
    7970         252 :                                             (dfWeight / dfTotalWeight) *
    7971         252 :                                             (dfValueImagTmp - dfValueImag);
    7972             :                                     }
    7973             :                                 }
    7974             :                             }
    7975             :                         }
    7976             :                     }
    7977             : 
    7978      300849 :                     if (dfTotalWeight > 0)
    7979             :                     {
    7980      300849 :                         if (poWK->bApplyVerticalShift)
    7981             :                         {
    7982           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7983           0 :                                 continue;
    7984             :                             // Subtract padfZ[] since the coordinate
    7985             :                             // transformation is from target to source
    7986           0 :                             dfValueReal =
    7987           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7988           0 :                                 padfZ[iDstX] *
    7989             :                                     dfMultFactorVerticalShiftPipeline;
    7990             :                         }
    7991             : 
    7992      300849 :                         dfBandDensity = 1;
    7993      300849 :                         bHasFoundDensity = true;
    7994             :                     }
    7995             :                 }  // GRA_Average.
    7996             : 
    7997     2522460 :                 else if (poWK->eResample == GRA_RMS)
    7998             :                 {
    7999      300416 :                     double dfTotalReal = 0.0;
    8000      300416 :                     double dfTotalImag = 0.0;
    8001      300416 :                     double dfTotalWeight = 0.0;
    8002             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    8003             :                     // in gcore/overview.cpp.
    8004      630578 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8005             :                     {
    8006      330162 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    8007      330162 :                         iSrcOffset = iSrcXMin +
    8008      330162 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8009      772930 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8010             :                              iSrcX++, iSrcOffset++)
    8011             :                         {
    8012      442768 :                             if (bWrapOverX)
    8013        1371 :                                 iSrcOffset =
    8014        1371 :                                     (iSrcX % nSrcXSize) +
    8015        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8016             : 
    8017      442768 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8018           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8019             :                                             iSrcOffset))
    8020             :                             {
    8021           0 :                                 continue;
    8022             :                             }
    8023             : 
    8024      442768 :                             if (GWKGetPixelValue(
    8025             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8026      885536 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8027      442768 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8028             :                             {
    8029      442768 :                                 const double dfWeight =
    8030      442768 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    8031      442768 :                                 dfTotalWeight += dfWeight;
    8032      442768 :                                 dfTotalReal +=
    8033      442768 :                                     dfValueRealTmp * dfValueRealTmp * dfWeight;
    8034      442768 :                                 if (bIsComplex)
    8035          48 :                                     dfTotalImag += dfValueImagTmp *
    8036          48 :                                                    dfValueImagTmp * dfWeight;
    8037             :                             }
    8038             :                         }
    8039             :                     }
    8040             : 
    8041      300416 :                     if (dfTotalWeight > 0)
    8042             :                     {
    8043      300416 :                         dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
    8044             : 
    8045      300416 :                         if (poWK->bApplyVerticalShift)
    8046             :                         {
    8047           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8048           0 :                                 continue;
    8049             :                             // Subtract padfZ[] since the coordinate
    8050             :                             // transformation is from target to source
    8051           0 :                             dfValueReal =
    8052           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8053           0 :                                 padfZ[iDstX] *
    8054             :                                     dfMultFactorVerticalShiftPipeline;
    8055             :                         }
    8056             : 
    8057      300416 :                         if (bIsComplex)
    8058          12 :                             dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
    8059             : 
    8060      300416 :                         dfBandDensity = 1;
    8061      300416 :                         bHasFoundDensity = true;
    8062             :                     }
    8063             :                 }  // GRA_RMS.
    8064             : 
    8065     2222040 :                 else if (poWK->eResample == GRA_Mode)
    8066             :                 {
    8067      496623 :                     float fMaxCount = 0.0f;
    8068      496623 :                     int nMode = -1;
    8069      496623 :                     bool bHasSourceValues = false;
    8070             : 
    8071      496623 :                     memset(pafCounts, 0, nBins * sizeof(float));
    8072             : 
    8073     1612560 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8074             :                     {
    8075     1115940 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    8076     1115940 :                         iSrcOffset = iSrcXMin +
    8077     1115940 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8078     4703370 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8079             :                              iSrcX++, iSrcOffset++)
    8080             :                         {
    8081     3587430 :                             if (bWrapOverX)
    8082        1371 :                                 iSrcOffset =
    8083        1371 :                                     (iSrcX % nSrcXSize) +
    8084        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8085             : 
    8086     3587430 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8087           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8088             :                                             iSrcOffset))
    8089           0 :                                 continue;
    8090             : 
    8091     3587430 :                             if (GWKGetPixelValue(
    8092             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8093     7174870 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8094     3587430 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8095             :                             {
    8096     3587430 :                                 bHasSourceValues = true;
    8097     3587430 :                                 const int nVal =
    8098     3587430 :                                     static_cast<int>(dfValueRealTmp);
    8099     3587430 :                                 const int iBin = nVal + nBinsOffset;
    8100     3587430 :                                 const double dfWeight =
    8101     3587430 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    8102             : 
    8103             :                                 // Sum the density.
    8104     3587430 :                                 pafCounts[iBin] += static_cast<float>(dfWeight);
    8105             :                                 // Is it the most common value so far?
    8106     3587430 :                                 bool bUpdateMode = pafCounts[iBin] > fMaxCount;
    8107     3587430 :                                 if (!bUpdateMode &&
    8108      750293 :                                     pafCounts[iBin] == fMaxCount)
    8109             :                                 {
    8110      217592 :                                     switch (eTieStrategy)
    8111             :                                     {
    8112      217584 :                                         case GWKTS_First:
    8113      217584 :                                             break;
    8114           4 :                                         case GWKTS_Min:
    8115           4 :                                             bUpdateMode = nVal < nMode;
    8116           4 :                                             break;
    8117           4 :                                         case GWKTS_Max:
    8118           4 :                                             bUpdateMode = nVal > nMode;
    8119           4 :                                             break;
    8120             :                                     }
    8121             :                                 }
    8122     3587430 :                                 if (bUpdateMode)
    8123             :                                 {
    8124     2837140 :                                     nMode = nVal;
    8125     2837140 :                                     fMaxCount = pafCounts[iBin];
    8126             :                                 }
    8127             :                             }
    8128             :                         }
    8129             :                     }
    8130             : 
    8131      496623 :                     if (bHasSourceValues)
    8132             :                     {
    8133      496623 :                         dfValueReal = nMode;
    8134      496623 :                         dfBandDensity = 1;
    8135      496623 :                         bHasFoundDensity = true;
    8136             :                     }
    8137             :                 }  // GRA_Mode.
    8138             : 
    8139     1725420 :                 else if (poWK->eResample == GRA_Max)
    8140             :                 {
    8141      335037 :                     bool bFoundValid = false;
    8142      335037 :                     double dfTotalReal = cpl::NumericLimits<double>::lowest();
    8143             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8144     1288010 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8145             :                     {
    8146      952975 :                         iSrcOffset = iSrcXMin +
    8147      952975 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8148     4376740 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8149             :                              iSrcX++, iSrcOffset++)
    8150             :                         {
    8151     3423770 :                             if (bWrapOverX)
    8152        1371 :                                 iSrcOffset =
    8153        1371 :                                     (iSrcX % nSrcXSize) +
    8154        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8155             : 
    8156     3426580 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8157        2809 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8158             :                                             iSrcOffset))
    8159             :                             {
    8160        2446 :                                 continue;
    8161             :                             }
    8162             : 
    8163             :                             // Returns pixel value if it is not no data.
    8164     3421320 :                             if (GWKGetPixelValue(
    8165             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8166     6842640 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8167     3421320 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8168             :                             {
    8169     3421320 :                                 bFoundValid = true;
    8170     3421320 :                                 if (dfTotalReal < dfValueRealTmp)
    8171             :                                 {
    8172      442234 :                                     dfTotalReal = dfValueRealTmp;
    8173             :                                 }
    8174             :                             }
    8175             :                         }
    8176             :                     }
    8177             : 
    8178      335037 :                     if (bFoundValid)
    8179             :                     {
    8180      335037 :                         dfValueReal = dfTotalReal;
    8181             : 
    8182      335037 :                         if (poWK->bApplyVerticalShift)
    8183             :                         {
    8184           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8185           0 :                                 continue;
    8186             :                             // Subtract padfZ[] since the coordinate
    8187             :                             // transformation is from target to source
    8188           0 :                             dfValueReal =
    8189           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8190           0 :                                 padfZ[iDstX] *
    8191             :                                     dfMultFactorVerticalShiftPipeline;
    8192             :                         }
    8193             : 
    8194      335037 :                         dfBandDensity = 1;
    8195      335037 :                         bHasFoundDensity = true;
    8196             :                     }
    8197             :                 }
    8198             : 
    8199     1390380 :                 else if (poWK->eResample == GRA_Min)
    8200             :                 {
    8201      335012 :                     bool bFoundValid = false;
    8202      335012 :                     double dfTotalReal = cpl::NumericLimits<double>::max();
    8203             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8204     1287720 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8205             :                     {
    8206      952710 :                         iSrcOffset = iSrcXMin +
    8207      952710 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8208     4373670 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8209             :                              iSrcX++, iSrcOffset++)
    8210             :                         {
    8211     3420960 :                             if (bWrapOverX)
    8212        1371 :                                 iSrcOffset =
    8213        1371 :                                     (iSrcX % nSrcXSize) +
    8214        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8215             : 
    8216     3420960 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8217           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8218             :                                             iSrcOffset))
    8219             :                             {
    8220           0 :                                 continue;
    8221             :                             }
    8222             : 
    8223             :                             // Returns pixel value if it is not no data.
    8224     3420960 :                             if (GWKGetPixelValue(
    8225             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8226     6841920 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8227     3420960 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8228             :                             {
    8229     3420960 :                                 bFoundValid = true;
    8230     3420960 :                                 if (dfTotalReal > dfValueRealTmp)
    8231             :                                 {
    8232      442628 :                                     dfTotalReal = dfValueRealTmp;
    8233             :                                 }
    8234             :                             }
    8235             :                         }
    8236             :                     }
    8237             : 
    8238      335012 :                     if (bFoundValid)
    8239             :                     {
    8240      335012 :                         dfValueReal = dfTotalReal;
    8241             : 
    8242      335012 :                         if (poWK->bApplyVerticalShift)
    8243             :                         {
    8244           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8245           0 :                                 continue;
    8246             :                             // Subtract padfZ[] since the coordinate
    8247             :                             // transformation is from target to source
    8248           0 :                             dfValueReal =
    8249           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8250           0 :                                 padfZ[iDstX] *
    8251             :                                     dfMultFactorVerticalShiftPipeline;
    8252             :                         }
    8253             : 
    8254      335012 :                         dfBandDensity = 1;
    8255      335012 :                         bHasFoundDensity = true;
    8256             :                     }
    8257             :                 }  // GRA_Min.
    8258             : 
    8259             :                 else
    8260             :                 // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
    8261             :                 {
    8262     1055370 :                     CPLAssert(quant > 0.0f);
    8263             : 
    8264     1055370 :                     bool bFoundValid = false;
    8265     1055370 :                     std::vector<double> dfRealValuesTmp;
    8266             : 
    8267             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8268     4014130 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8269             :                     {
    8270     2958760 :                         iSrcOffset = iSrcXMin +
    8271     2958760 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8272    13421300 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8273             :                              iSrcX++, iSrcOffset++)
    8274             :                         {
    8275    10462500 :                             if (bWrapOverX)
    8276        4113 :                                 iSrcOffset =
    8277        4113 :                                     (iSrcX % nSrcXSize) +
    8278        4113 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8279             : 
    8280    10659100 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8281      196608 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8282             :                                             iSrcOffset))
    8283             :                             {
    8284      195449 :                                 continue;
    8285             :                             }
    8286             : 
    8287             :                             // Returns pixel value if it is not no data.
    8288    10267100 :                             if (GWKGetPixelValue(
    8289             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8290    20534100 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8291    10267100 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8292             :                             {
    8293    10267100 :                                 bFoundValid = true;
    8294    10267100 :                                 dfRealValuesTmp.push_back(dfValueRealTmp);
    8295             :                             }
    8296             :                         }
    8297             :                     }
    8298             : 
    8299     1055370 :                     if (bFoundValid)
    8300             :                     {
    8301     1006150 :                         std::sort(dfRealValuesTmp.begin(),
    8302             :                                   dfRealValuesTmp.end());
    8303             :                         int quantIdx = static_cast<int>(
    8304     1006150 :                             std::ceil(quant * dfRealValuesTmp.size() - 1));
    8305     1006150 :                         dfValueReal = dfRealValuesTmp[quantIdx];
    8306             : 
    8307     1006150 :                         if (poWK->bApplyVerticalShift)
    8308             :                         {
    8309           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8310           0 :                                 continue;
    8311             :                             // Subtract padfZ[] since the coordinate
    8312             :                             // transformation is from target to source
    8313           0 :                             dfValueReal =
    8314           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8315           0 :                                 padfZ[iDstX] *
    8316             :                                     dfMultFactorVerticalShiftPipeline;
    8317             :                         }
    8318             : 
    8319     1006150 :                         dfBandDensity = 1;
    8320     1006150 :                         bHasFoundDensity = true;
    8321     1006150 :                         dfRealValuesTmp.clear();
    8322             :                     }
    8323             :                 }  // Quantile.
    8324             : 
    8325             :                 /* --------------------------------------------------------------------
    8326             :                  */
    8327             :                 /*      We have a computed value from the source.  Now apply it
    8328             :                  * to      */
    8329             :                 /*      the destination pixel. */
    8330             :                 /* --------------------------------------------------------------------
    8331             :                  */
    8332     2823310 :                 if (bHasFoundDensity)
    8333             :                 {
    8334             :                     // TODO: Should we compute dfBandDensity in fct of
    8335             :                     // nCount/nCount2, or use as a threshold to set the dest
    8336             :                     // value?
    8337             :                     // dfBandDensity = (float) nCount / nCount2;
    8338             :                     // if( (float) nCount / nCount2 > 0.1 )
    8339             :                     // or fix gdalwarp crop_to_cutline to crop partially
    8340             :                     // overlapping pixels.
    8341     2774080 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    8342             :                                      dfValueReal, dfValueImag,
    8343             :                                      bAvoidNoDataSingleBand);
    8344             :                 }
    8345             :             }
    8346             : 
    8347     1906710 :             if (!bHasFoundDensity)
    8348      507971 :                 continue;
    8349             : 
    8350     1398740 :             if (!bAvoidNoDataSingleBand)
    8351             :             {
    8352           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    8353             :             }
    8354             : 
    8355             :             /* --------------------------------------------------------------------
    8356             :              */
    8357             :             /*      Update destination density/validity masks. */
    8358             :             /* --------------------------------------------------------------------
    8359             :              */
    8360     1398740 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    8361             : 
    8362     1398740 :             if (poWK->panDstValid != nullptr)
    8363             :             {
    8364        1184 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    8365             :             }
    8366             :         } /* Next iDstX */
    8367             : 
    8368             :         /* --------------------------------------------------------------------
    8369             :          */
    8370             :         /*      Report progress to the user, and optionally cancel out. */
    8371             :         /* --------------------------------------------------------------------
    8372             :          */
    8373        8078 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    8374           0 :             break;
    8375             :     }
    8376             : 
    8377             :     /* -------------------------------------------------------------------- */
    8378             :     /*      Cleanup and return.                                             */
    8379             :     /* -------------------------------------------------------------------- */
    8380         136 :     CPLFree(padfX);
    8381         136 :     CPLFree(padfY);
    8382         136 :     CPLFree(padfZ);
    8383         136 :     CPLFree(padfX2);
    8384         136 :     CPLFree(padfY2);
    8385         136 :     CPLFree(padfZ2);
    8386         136 :     CPLFree(pabSuccess);
    8387         136 :     CPLFree(pabSuccess2);
    8388         136 :     VSIFree(pafCounts);
    8389             : }
    8390             : 
    8391             : /************************************************************************/
    8392             : /*                           getOrientation()                           */
    8393             : /************************************************************************/
    8394             : 
    8395             : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
    8396             : // -1 if it is counter-clockwise oriented,
    8397             : // or 0 if it is colinear.
    8398     2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
    8399             : {
    8400     2355910 :     const double p1x = p1.first;
    8401     2355910 :     const double p1y = p1.second;
    8402     2355910 :     const double p2x = p2.first;
    8403     2355910 :     const double p2y = p2.second;
    8404     2355910 :     const double p3x = p3.first;
    8405     2355910 :     const double p3y = p3.second;
    8406     2355910 :     const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
    8407     2355910 :     if (std::abs(val) < 1e-20)
    8408        2690 :         return 0;
    8409     2353220 :     else if (val > 0)
    8410           0 :         return 1;
    8411             :     else
    8412     2353220 :         return -1;
    8413             : }
    8414             : 
    8415             : /************************************************************************/
    8416             : /*                              isConvex()                              */
    8417             : /************************************************************************/
    8418             : 
    8419             : // poly must be closed
    8420      785302 : static bool isConvex(const XYPoly &poly)
    8421             : {
    8422      785302 :     const size_t n = poly.size();
    8423      785302 :     size_t i = 0;
    8424      785302 :     int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8425      785302 :     ++i;
    8426     2355910 :     for (; i < n - 2; ++i)
    8427             :     {
    8428             :         const int orientation =
    8429     1570600 :             getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8430     1570600 :         if (orientation != 0)
    8431             :         {
    8432     1567910 :             if (last_orientation == 0)
    8433           0 :                 last_orientation = orientation;
    8434     1567910 :             else if (orientation != last_orientation)
    8435           0 :                 return false;
    8436             :         }
    8437             :     }
    8438      785302 :     return true;
    8439             : }
    8440             : 
    8441             : /************************************************************************/
    8442             : /*                     pointIntersectsConvexPoly()                      */
    8443             : /************************************************************************/
    8444             : 
    8445             : // Returns whether xy intersects poly, that must be closed and convex.
    8446     6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
    8447             : {
    8448     6049100 :     const size_t n = poly.size();
    8449     6049100 :     double dx1 = xy.first - poly[0].first;
    8450     6049100 :     double dy1 = xy.second - poly[0].second;
    8451     6049100 :     double dx2 = poly[1].first - poly[0].first;
    8452     6049100 :     double dy2 = poly[1].second - poly[0].second;
    8453     6049100 :     double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
    8454             : 
    8455             :     // Check if the point remains on the same side (left/right) of all edges
    8456    14556400 :     for (size_t i = 2; i < n; i++)
    8457             :     {
    8458    12793100 :         dx1 = xy.first - poly[i - 1].first;
    8459    12793100 :         dy1 = xy.second - poly[i - 1].second;
    8460             : 
    8461    12793100 :         dx2 = poly[i].first - poly[i - 1].first;
    8462    12793100 :         dy2 = poly[i].second - poly[i - 1].second;
    8463             : 
    8464    12793100 :         double crossProduct = dx1 * dy2 - dx2 * dy1;
    8465    12793100 :         if (std::abs(prevCrossProduct) < 1e-20)
    8466      725558 :             prevCrossProduct = crossProduct;
    8467    12067500 :         else if (prevCrossProduct * crossProduct < 0)
    8468     4285760 :             return false;
    8469             :     }
    8470             : 
    8471     1763340 :     return true;
    8472             : }
    8473             : 
    8474             : /************************************************************************/
    8475             : /*                          getIntersection()                           */
    8476             : /************************************************************************/
    8477             : 
    8478             : /* Returns intersection of [p1,p2] with [p3,p4], if
    8479             :  * it is a single point, and the 2 segments are not colinear.
    8480             :  */
    8481    11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
    8482             :                             const XYPair &p3, const XYPair &p4, XYPair &xy)
    8483             : {
    8484    11811000 :     const double x1 = p1.first;
    8485    11811000 :     const double y1 = p1.second;
    8486    11811000 :     const double x2 = p2.first;
    8487    11811000 :     const double y2 = p2.second;
    8488    11811000 :     const double x3 = p3.first;
    8489    11811000 :     const double y3 = p3.second;
    8490    11811000 :     const double x4 = p4.first;
    8491    11811000 :     const double y4 = p4.second;
    8492    11811000 :     const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
    8493    11811000 :     const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
    8494    11811000 :     if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
    8495     9260780 :         return false;
    8496             : 
    8497     2550260 :     const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
    8498     2550260 :     if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
    8499      973924 :         return false;
    8500             : 
    8501     1576340 :     const double t = t_num / denom;
    8502     1576340 :     xy.first = x1 + t * (x2 - x1);
    8503     1576340 :     xy.second = y1 + t * (y2 - y1);
    8504     1576340 :     return true;
    8505             : }
    8506             : 
    8507             : /************************************************************************/
    8508             : /*                     getConvexPolyIntersection()                      */
    8509             : /************************************************************************/
    8510             : 
    8511             : // poly1 and poly2 must be closed and convex.
    8512             : // The returned intersection will not necessary be closed.
    8513      785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
    8514             :                                       XYPoly &intersection)
    8515             : {
    8516      785302 :     intersection.clear();
    8517             : 
    8518             :     // Add all points of poly1 inside poly2
    8519     3926510 :     for (size_t i = 0; i < poly1.size() - 1; ++i)
    8520             :     {
    8521     3141210 :         if (pointIntersectsConvexPoly(poly1[i], poly2))
    8522     1187430 :             intersection.push_back(poly1[i]);
    8523             :     }
    8524      785302 :     if (intersection.size() == poly1.size() - 1)
    8525             :     {
    8526             :         // poly1 is inside poly2
    8527      119100 :         return;
    8528             :     }
    8529             : 
    8530             :     // Add all points of poly2 inside poly1
    8531     3634860 :     for (size_t i = 0; i < poly2.size() - 1; ++i)
    8532             :     {
    8533     2907890 :         if (pointIntersectsConvexPoly(poly2[i], poly1))
    8534      575904 :             intersection.push_back(poly2[i]);
    8535             :     }
    8536             : 
    8537             :     // Compute the intersection of all edges of both polygons
    8538      726972 :     XYPair xy;
    8539     3634860 :     for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
    8540             :     {
    8541    14539400 :         for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
    8542             :         {
    8543    11631600 :             if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
    8544    11631600 :                                 poly2[i2 + 1], xy))
    8545             :             {
    8546     1576230 :                 intersection.push_back(xy);
    8547             :             }
    8548             :         }
    8549             :     }
    8550             : 
    8551      726972 :     if (intersection.empty())
    8552       60770 :         return;
    8553             : 
    8554             :     // Find lowest-left point in intersection set
    8555      666202 :     double lowest_x = cpl::NumericLimits<double>::max();
    8556      666202 :     double lowest_y = cpl::NumericLimits<double>::max();
    8557     3772450 :     for (const auto &pair : intersection)
    8558             :     {
    8559     3106240 :         const double x = pair.first;
    8560     3106240 :         const double y = pair.second;
    8561     3106240 :         if (y < lowest_y || (y == lowest_y && x < lowest_x))
    8562             :         {
    8563     1096040 :             lowest_x = x;
    8564     1096040 :             lowest_y = y;
    8565             :         }
    8566             :     }
    8567             : 
    8568     5737980 :     const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
    8569             :     {
    8570     5737980 :         const double p1x_diff = p1.first - lowest_x;
    8571     5737980 :         const double p1y_diff = p1.second - lowest_y;
    8572     5737980 :         const double p2x_diff = p2.first - lowest_x;
    8573     5737980 :         const double p2y_diff = p2.second - lowest_y;
    8574     5737980 :         if (p2y_diff == 0.0 && p1y_diff == 0.0)
    8575             :         {
    8576     2655420 :             if (p1x_diff >= 0)
    8577             :             {
    8578     2655420 :                 if (p2x_diff >= 0)
    8579     2655420 :                     return p1.first < p2.first;
    8580           0 :                 return true;
    8581             :             }
    8582             :             else
    8583             :             {
    8584           0 :                 if (p2x_diff >= 0)
    8585           0 :                     return false;
    8586           0 :                 return p1.first < p2.first;
    8587             :             }
    8588             :         }
    8589             : 
    8590     3082560 :         if (p2x_diff == 0.0 && p1x_diff == 0.0)
    8591     1046960 :             return p1.second < p2.second;
    8592             : 
    8593             :         double tan_p1;
    8594     2035600 :         if (p1x_diff == 0.0)
    8595      464622 :             tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    8596             :         else
    8597     1570980 :             tan_p1 = p1y_diff / p1x_diff;
    8598             : 
    8599             :         double tan_p2;
    8600     2035600 :         if (p2x_diff == 0.0)
    8601      839515 :             tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    8602             :         else
    8603     1196080 :             tan_p2 = p2y_diff / p2x_diff;
    8604             : 
    8605     2035600 :         if (tan_p1 >= 0)
    8606             :         {
    8607     1904790 :             if (tan_p2 >= 0)
    8608     1881590 :                 return tan_p1 < tan_p2;
    8609             :             else
    8610       23199 :                 return true;
    8611             :         }
    8612             :         else
    8613             :         {
    8614      130806 :             if (tan_p2 >= 0)
    8615      103900 :                 return false;
    8616             :             else
    8617       26906 :                 return tan_p1 < tan_p2;
    8618             :         }
    8619      666202 :     };
    8620             : 
    8621             :     // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
    8622             :     // hull
    8623      666202 :     std::sort(intersection.begin(), intersection.end(), sortFunc);
    8624             : 
    8625             :     // Remove duplicated points
    8626      666202 :     size_t j = 1;
    8627     3106240 :     for (size_t i = 1; i < intersection.size(); ++i)
    8628             :     {
    8629     2440040 :         if (intersection[i] != intersection[i - 1])
    8630             :         {
    8631     1452560 :             if (j < i)
    8632      545275 :                 intersection[j] = intersection[i];
    8633     1452560 :             ++j;
    8634             :         }
    8635             :     }
    8636      666202 :     intersection.resize(j);
    8637             : }
    8638             : 
    8639             : /************************************************************************/
    8640             : /*                          GWKSumPreserving()                          */
    8641             : /************************************************************************/
    8642             : 
    8643             : static void GWKSumPreservingThread(void *pData);
    8644             : 
    8645          19 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
    8646             : {
    8647          19 :     return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
    8648             : }
    8649             : 
    8650          19 : static void GWKSumPreservingThread(void *pData)
    8651             : {
    8652          19 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    8653          19 :     GDALWarpKernel *poWK = psJob->poWK;
    8654          19 :     const int iYMin = psJob->iYMin;
    8655          19 :     const int iYMax = psJob->iYMax;
    8656             :     const bool bIsAffineNoRotation =
    8657          19 :         GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
    8658          28 :                                         poWK->pTransformerArg) &&
    8659             :         // for debug/testing purposes
    8660           9 :         CPLTestBool(
    8661          19 :             CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
    8662             :     const bool bAvoidNoDataSingleBand =
    8663          21 :         poWK->nBands == 1 ||
    8664           2 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    8665          19 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    8666             : 
    8667          19 :     const int nDstXSize = poWK->nDstXSize;
    8668          19 :     const int nSrcXSize = poWK->nSrcXSize;
    8669          19 :     const int nSrcYSize = poWK->nSrcYSize;
    8670             : 
    8671          38 :     std::vector<double> adfX0(nSrcXSize + 1);
    8672          38 :     std::vector<double> adfY0(nSrcXSize + 1);
    8673          38 :     std::vector<double> adfZ0(nSrcXSize + 1);
    8674          38 :     std::vector<double> adfX1(nSrcXSize + 1);
    8675          38 :     std::vector<double> adfY1(nSrcXSize + 1);
    8676          38 :     std::vector<double> adfZ1(nSrcXSize + 1);
    8677          38 :     std::vector<int> abSuccess0(nSrcXSize + 1);
    8678          38 :     std::vector<int> abSuccess1(nSrcXSize + 1);
    8679             : 
    8680             :     CPLRectObj sGlobalBounds;
    8681          19 :     sGlobalBounds.minx = -2 * poWK->dfXScale;
    8682          19 :     sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
    8683          19 :     sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
    8684          19 :     sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
    8685          19 :     CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
    8686             : 
    8687             :     struct SourcePixel
    8688             :     {
    8689             :         int iSrcX;
    8690             :         int iSrcY;
    8691             : 
    8692             :         // Coordinates of source pixel in target pixel coordinates
    8693             :         double dfDstX0;
    8694             :         double dfDstY0;
    8695             :         double dfDstX1;
    8696             :         double dfDstY1;
    8697             :         double dfDstX2;
    8698             :         double dfDstY2;
    8699             :         double dfDstX3;
    8700             :         double dfDstY3;
    8701             : 
    8702             :         // Source pixel total area (might be larger than the one described
    8703             :         // by above coordinates, if the pixel was crossing the antimeridian
    8704             :         // and split)
    8705             :         double dfArea;
    8706             :     };
    8707             : 
    8708          38 :     std::vector<SourcePixel> sourcePixels;
    8709             : 
    8710          38 :     XYPoly discontinuityLeft(5);
    8711          38 :     XYPoly discontinuityRight(5);
    8712             : 
    8713             :     /* ==================================================================== */
    8714             :     /*      First pass: transform the 4 corners of each potential           */
    8715             :     /*      contributing source pixel to target pixel coordinates.          */
    8716             :     /* ==================================================================== */
    8717             : 
    8718             :     // Special case for top line
    8719             :     {
    8720          19 :         int iY = 0;
    8721        3364 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8722             :         {
    8723        3345 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8724        3345 :             adfY1[iX] = iY + poWK->nSrcYOff;
    8725        3345 :             adfZ1[iX] = 0;
    8726             :         }
    8727             : 
    8728          19 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8729             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8730             :                              abSuccess1.data());
    8731             : 
    8732        3364 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8733             :         {
    8734        3345 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8735           0 :                 abSuccess1[iX] = FALSE;
    8736             :             else
    8737             :             {
    8738        3345 :                 adfX1[iX] -= poWK->nDstXOff;
    8739        3345 :                 adfY1[iX] -= poWK->nDstYOff;
    8740             :             }
    8741             :         }
    8742             :     }
    8743             : 
    8744        2032 :     const auto getInsideXSign = [poWK, nDstXSize](double dfX)
    8745             :     {
    8746        2032 :         return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
    8747         872 :                        dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
    8748        2032 :                    ? 1
    8749        1160 :                    : -1;
    8750          19 :     };
    8751             : 
    8752             :     const auto FindDiscontinuity =
    8753          80 :         [poWK, psJob, getInsideXSign](
    8754             :             double dfXLeft, double dfXRight, double dfY,
    8755             :             int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
    8756         800 :             double &dfXMidReprojectedRight, double &dfYMidReprojected)
    8757             :     {
    8758         880 :         for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
    8759             :         {
    8760         800 :             double dfXMid = (dfXLeft + dfXRight) / 2;
    8761         800 :             double dfXMidReprojected = dfXMid;
    8762         800 :             dfYMidReprojected = dfY;
    8763         800 :             double dfZ = 0;
    8764         800 :             int nSuccess = 0;
    8765         800 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
    8766             :                                  &dfXMidReprojected, &dfYMidReprojected, &dfZ,
    8767             :                                  &nSuccess);
    8768         800 :             if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
    8769             :             {
    8770         456 :                 dfXRight = dfXMid;
    8771         456 :                 dfXMidReprojectedRight = dfXMidReprojected;
    8772             :             }
    8773             :             else
    8774             :             {
    8775         344 :                 dfXLeft = dfXMid;
    8776         344 :                 dfXMidReprojectedLeft = dfXMidReprojected;
    8777             :             }
    8778             :         }
    8779          80 :     };
    8780             : 
    8781        2685 :     for (int iY = 0; iY < nSrcYSize; ++iY)
    8782             :     {
    8783        2666 :         std::swap(adfX0, adfX1);
    8784        2666 :         std::swap(adfY0, adfY1);
    8785        2666 :         std::swap(adfZ0, adfZ1);
    8786        2666 :         std::swap(abSuccess0, abSuccess1);
    8787             : 
    8788     4836120 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8789             :         {
    8790     4833460 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8791     4833460 :             adfY1[iX] = iY + 1 + poWK->nSrcYOff;
    8792     4833460 :             adfZ1[iX] = 0;
    8793             :         }
    8794             : 
    8795        2666 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8796             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8797             :                              abSuccess1.data());
    8798             : 
    8799     4836120 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8800             :         {
    8801     4833460 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8802           0 :                 abSuccess1[iX] = FALSE;
    8803             :             else
    8804             :             {
    8805     4833460 :                 adfX1[iX] -= poWK->nDstXOff;
    8806     4833460 :                 adfY1[iX] -= poWK->nDstYOff;
    8807             :             }
    8808             :         }
    8809             : 
    8810     4833460 :         for (int iX = 0; iX < nSrcXSize; ++iX)
    8811             :         {
    8812     9661580 :             if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
    8813     4830790 :                 abSuccess1[iX + 1])
    8814             :             {
    8815             :                 /* --------------------------------------------------------------------
    8816             :                  */
    8817             :                 /*      Do not try to apply transparent source pixels to the
    8818             :                  * destination.*/
    8819             :                 /* --------------------------------------------------------------------
    8820             :                  */
    8821     4830790 :                 const auto iSrcOffset =
    8822     4830790 :                     iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
    8823     9560570 :                 if (poWK->panUnifiedSrcValid != nullptr &&
    8824     4729780 :                     !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    8825             :                 {
    8826     4738340 :                     continue;
    8827             :                 }
    8828             : 
    8829      103415 :                 if (poWK->pafUnifiedSrcDensity != nullptr)
    8830             :                 {
    8831           0 :                     if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
    8832             :                         SRC_DENSITY_THRESHOLD_FLOAT)
    8833           0 :                         continue;
    8834             :                 }
    8835             : 
    8836             :                 SourcePixel sp;
    8837      103415 :                 sp.dfArea = 0;
    8838      103415 :                 sp.dfDstX0 = adfX0[iX];
    8839      103415 :                 sp.dfDstY0 = adfY0[iX];
    8840      103415 :                 sp.dfDstX1 = adfX0[iX + 1];
    8841      103415 :                 sp.dfDstY1 = adfY0[iX + 1];
    8842      103415 :                 sp.dfDstX2 = adfX1[iX + 1];
    8843      103415 :                 sp.dfDstY2 = adfY1[iX + 1];
    8844      103415 :                 sp.dfDstX3 = adfX1[iX];
    8845      103415 :                 sp.dfDstY3 = adfY1[iX];
    8846             : 
    8847             :                 // Detect pixel that likely cross the anti-meridian and
    8848             :                 // introduce a discontinuity when reprojected.
    8849             : 
    8850      103415 :                 if (std::fabs(adfX0[iX] - adfX0[iX + 1]) > 2 * poWK->dfXScale &&
    8851          80 :                     std::fabs(adfX1[iX] - adfX1[iX + 1]) > 2 * poWK->dfXScale &&
    8852          40 :                     getInsideXSign(adfX0[iX]) !=
    8853          80 :                         getInsideXSign(adfX0[iX + 1]) &&
    8854          80 :                     getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
    8855          40 :                     getInsideXSign(adfX0[iX + 1]) ==
    8856      103495 :                         getInsideXSign(adfX1[iX + 1]) &&
    8857          40 :                     (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
    8858             :                         0)
    8859             :                 {
    8860             : #ifdef DEBUG_VERBOSE
    8861             :                     CPLDebug(
    8862             :                         "WARP",
    8863             :                         "Discontinuity for iSrcX=%d, iSrcY=%d, dest corners:"
    8864             :                         "X0[iX]=%f X0[iX+1]=%f X1[iX]=%f X1[iX+1]=%f,"
    8865             :                         "Y0[iX]=%f Y0[iX+1]=%f Y1[iX]=%f Y1[iX+1]=%f",
    8866             :                         iX + poWK->nSrcXOff, iY + poWK->nSrcYOff, adfX0[iX],
    8867             :                         adfX0[iX + 1], adfX1[iX], adfX1[iX + 1], adfY0[iX],
    8868             :                         adfY0[iX + 1], adfY1[iX], adfY1[iX + 1]);
    8869             : #endif
    8870          40 :                     double dfXMidReprojectedLeftTop = 0;
    8871          40 :                     double dfXMidReprojectedRightTop = 0;
    8872          40 :                     double dfYMidReprojectedTop = 0;
    8873          40 :                     FindDiscontinuity(
    8874          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8875          80 :                         iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
    8876             :                         dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
    8877             :                         dfYMidReprojectedTop);
    8878          40 :                     double dfXMidReprojectedLeftBottom = 0;
    8879          40 :                     double dfXMidReprojectedRightBottom = 0;
    8880          40 :                     double dfYMidReprojectedBottom = 0;
    8881          40 :                     FindDiscontinuity(
    8882          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8883          80 :                         iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
    8884             :                         dfXMidReprojectedLeftBottom,
    8885             :                         dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
    8886             : 
    8887          40 :                     discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
    8888          40 :                     discontinuityLeft[1] =
    8889          80 :                         XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
    8890          40 :                     discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
    8891          40 :                                                   dfYMidReprojectedBottom);
    8892          40 :                     discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
    8893          40 :                     discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
    8894             : 
    8895          40 :                     discontinuityRight[0] =
    8896          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8897          40 :                     discontinuityRight[1] =
    8898          80 :                         XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
    8899          40 :                     discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
    8900          40 :                                                    dfYMidReprojectedBottom);
    8901          40 :                     discontinuityRight[3] =
    8902          80 :                         XYPair(adfX1[iX + 1], adfY1[iX + 1]);
    8903          40 :                     discontinuityRight[4] =
    8904          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8905             : 
    8906          40 :                     sp.dfArea = getArea(discontinuityLeft) +
    8907          40 :                                 getArea(discontinuityRight);
    8908          40 :                     if (getInsideXSign(adfX0[iX]) >= 1)
    8909             :                     {
    8910          20 :                         sp.dfDstX1 = dfXMidReprojectedLeftTop;
    8911          20 :                         sp.dfDstY1 = dfYMidReprojectedTop;
    8912          20 :                         sp.dfDstX2 = dfXMidReprojectedLeftBottom;
    8913          20 :                         sp.dfDstY2 = dfYMidReprojectedBottom;
    8914             :                     }
    8915             :                     else
    8916             :                     {
    8917          20 :                         sp.dfDstX0 = dfXMidReprojectedRightTop;
    8918          20 :                         sp.dfDstY0 = dfYMidReprojectedTop;
    8919          20 :                         sp.dfDstX3 = dfXMidReprojectedRightBottom;
    8920          20 :                         sp.dfDstY3 = dfYMidReprojectedBottom;
    8921             :                     }
    8922             :                 }
    8923             : 
    8924             :                 // Bounding box of source pixel (expressed in target pixel
    8925             :                 // coordinates)
    8926             :                 CPLRectObj sRect;
    8927      103415 :                 sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
    8928      103415 :                                       std::min(sp.dfDstX2, sp.dfDstX3));
    8929      103415 :                 sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
    8930      103415 :                                       std::min(sp.dfDstY2, sp.dfDstY3));
    8931      103415 :                 sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
    8932      103415 :                                       std::max(sp.dfDstX2, sp.dfDstX3));
    8933      103415 :                 sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
    8934      103415 :                                       std::max(sp.dfDstY2, sp.dfDstY3));
    8935      103415 :                 if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
    8936      101355 :                       sRect.miny < iYMax && sRect.maxy > iYMin))
    8937             :                 {
    8938       10852 :                     continue;
    8939             :                 }
    8940             : 
    8941       92563 :                 sp.iSrcX = iX;
    8942       92563 :                 sp.iSrcY = iY;
    8943             : 
    8944       92563 :                 if (!bIsAffineNoRotation)
    8945             :                 {
    8946             :                     // Check polygon validity (no self-crossing)
    8947       89745 :                     XYPair xy;
    8948       89745 :                     if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
    8949       89745 :                                         XYPair(sp.dfDstX1, sp.dfDstY1),
    8950       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8951      269235 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
    8952       89745 :                         getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
    8953       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8954       89745 :                                         XYPair(sp.dfDstX0, sp.dfDstY0),
    8955      179490 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy))
    8956             :                     {
    8957         113 :                         continue;
    8958             :                     }
    8959             :                 }
    8960             : 
    8961       92450 :                 CPLQuadTreeInsertWithBounds(
    8962             :                     hQuadTree,
    8963             :                     reinterpret_cast<void *>(
    8964       92450 :                         static_cast<uintptr_t>(sourcePixels.size())),
    8965             :                     &sRect);
    8966             : 
    8967       92450 :                 sourcePixels.push_back(sp);
    8968             :             }
    8969             :         }
    8970             :     }
    8971             : 
    8972          38 :     std::vector<double> adfRealValue(poWK->nBands);
    8973          38 :     std::vector<double> adfImagValue(poWK->nBands);
    8974          38 :     std::vector<double> adfBandDensity(poWK->nBands);
    8975          38 :     std::vector<double> adfWeight(poWK->nBands);
    8976             : 
    8977             : #ifdef CHECK_SUM_WITH_GEOS
    8978             :     auto hGEOSContext = OGRGeometry::createGEOSContext();
    8979             :     auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8980             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
    8981             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
    8982             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
    8983             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
    8984             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
    8985             :     auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
    8986             :     auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
    8987             : 
    8988             :     auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8989             :     auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
    8990             :     auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
    8991             : #endif
    8992             : 
    8993             :     const XYPoly xy1{
    8994          38 :         {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
    8995          38 :     XYPoly xy2(5);
    8996          38 :     XYPoly xy2_triangle(4);
    8997          38 :     XYPoly intersection;
    8998             : 
    8999             :     /* ==================================================================== */
    9000             :     /*      Loop over output lines.                                         */
    9001             :     /* ==================================================================== */
    9002        1951 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    9003             :     {
    9004             :         CPLRectObj sRect;
    9005        1932 :         sRect.miny = iDstY;
    9006        1932 :         sRect.maxy = iDstY + 1;
    9007             : 
    9008             :         /* ====================================================================
    9009             :          */
    9010             :         /*      Loop over pixels in output scanline. */
    9011             :         /* ====================================================================
    9012             :          */
    9013     1403940 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    9014             :         {
    9015     1402010 :             sRect.minx = iDstX;
    9016     1402010 :             sRect.maxx = iDstX + 1;
    9017     1402010 :             int nSourcePixels = 0;
    9018             :             void **pahSourcePixel =
    9019     1402010 :                 CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
    9020     1402010 :             if (nSourcePixels == 0)
    9021             :             {
    9022     1183090 :                 CPLFree(pahSourcePixel);
    9023     1183100 :                 continue;
    9024             :             }
    9025             : 
    9026      218919 :             std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
    9027      218919 :             std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
    9028      218919 :             std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
    9029      218919 :             std::fill(adfWeight.begin(), adfWeight.end(), 0);
    9030      218919 :             double dfDensity = 0;
    9031             :             // Just above zero to please Coveriy Scan
    9032      218919 :             double dfTotalWeight = std::numeric_limits<double>::min();
    9033             : 
    9034             :             /* ====================================================================
    9035             :              */
    9036             :             /*          Iterate over each contributing source pixel to add its
    9037             :              */
    9038             :             /*          value weighed by the ratio of the area of its
    9039             :              * intersection  */
    9040             :             /*          with the target pixel divided by the area of the source
    9041             :              */
    9042             :             /*          pixel. */
    9043             :             /* ====================================================================
    9044             :              */
    9045     1020550 :             for (int i = 0; i < nSourcePixels; ++i)
    9046             :             {
    9047      801628 :                 const int iSourcePixel = static_cast<int>(
    9048      801628 :                     reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
    9049      801628 :                 auto &sp = sourcePixels[iSourcePixel];
    9050             : 
    9051      801628 :                 double dfWeight = 0.0;
    9052      801628 :                 if (bIsAffineNoRotation)
    9053             :                 {
    9054             :                     // Optimization since the source pixel is a rectangle in
    9055             :                     // target pixel coordinates
    9056       16326 :                     double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
    9057       16326 :                     double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
    9058       16326 :                     double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
    9059       16326 :                     double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
    9060       16326 :                     double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
    9061       16326 :                     double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
    9062       16326 :                     double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
    9063       16326 :                     double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
    9064       16326 :                     dfWeight =
    9065       16326 :                         ((dfIntersMaxX - dfIntersMinX) *
    9066       16326 :                          (dfIntersMaxY - dfIntersMinY)) /
    9067       16326 :                         ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
    9068             :                 }
    9069             :                 else
    9070             :                 {
    9071             :                     // Compute the polygon of the source pixel in target pixel
    9072             :                     // coordinates, and shifted to the target pixel (unit square
    9073             :                     // coordinates)
    9074             : 
    9075      785302 :                     xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    9076      785302 :                     xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
    9077      785302 :                     xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
    9078      785302 :                     xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
    9079      785302 :                     xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    9080             : 
    9081      785302 :                     if (isConvex(xy2))
    9082             :                     {
    9083      785302 :                         getConvexPolyIntersection(xy1, xy2, intersection);
    9084      785302 :                         if (intersection.size() >= 3)
    9085             :                         {
    9086      468849 :                             dfWeight = getArea(intersection);
    9087             :                         }
    9088             :                     }
    9089             :                     else
    9090             :                     {
    9091             :                         // Split xy2 into 2 triangles.
    9092           0 :                         xy2_triangle[0] = xy2[0];
    9093           0 :                         xy2_triangle[1] = xy2[1];
    9094           0 :                         xy2_triangle[2] = xy2[2];
    9095           0 :                         xy2_triangle[3] = xy2[0];
    9096           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    9097             :                                                   intersection);
    9098           0 :                         if (intersection.size() >= 3)
    9099             :                         {
    9100           0 :                             dfWeight = getArea(intersection);
    9101             :                         }
    9102             : 
    9103           0 :                         xy2_triangle[1] = xy2[2];
    9104           0 :                         xy2_triangle[2] = xy2[3];
    9105           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    9106             :                                                   intersection);
    9107           0 :                         if (intersection.size() >= 3)
    9108             :                         {
    9109           0 :                             dfWeight += getArea(intersection);
    9110             :                         }
    9111             :                     }
    9112      785302 :                     if (dfWeight > 0.0)
    9113             :                     {
    9114      468828 :                         if (sp.dfArea == 0)
    9115       89592 :                             sp.dfArea = getArea(xy2);
    9116      468828 :                         dfWeight /= sp.dfArea;
    9117             :                     }
    9118             : 
    9119             : #ifdef CHECK_SUM_WITH_GEOS
    9120             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
    9121             :                                          sp.dfDstX0 - iDstX,
    9122             :                                          sp.dfDstY0 - iDstY);
    9123             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
    9124             :                                          sp.dfDstX1 - iDstX,
    9125             :                                          sp.dfDstY1 - iDstY);
    9126             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
    9127             :                                          sp.dfDstX2 - iDstX,
    9128             :                                          sp.dfDstY2 - iDstY);
    9129             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
    9130             :                                          sp.dfDstX3 - iDstX,
    9131             :                                          sp.dfDstY3 - iDstY);
    9132             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
    9133             :                                          sp.dfDstX0 - iDstX,
    9134             :                                          sp.dfDstY0 - iDstY);
    9135             : 
    9136             :                     double dfWeightGEOS = 0.0;
    9137             :                     auto hIntersection =
    9138             :                         GEOSIntersection_r(hGEOSContext, hP1, hP2);
    9139             :                     if (hIntersection)
    9140             :                     {
    9141             :                         double dfIntersArea = 0.0;
    9142             :                         if (GEOSArea_r(hGEOSContext, hIntersection,
    9143             :                                        &dfIntersArea) &&
    9144             :                             dfIntersArea > 0)
    9145             :                         {
    9146             :                             double dfSourceArea = 0.0;
    9147             :                             if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
    9148             :                             {
    9149             :                                 dfWeightGEOS = dfIntersArea / dfSourceArea;
    9150             :                             }
    9151             :                         }
    9152             :                         GEOSGeom_destroy_r(hGEOSContext, hIntersection);
    9153             :                     }
    9154             :                     if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
    9155             :                     {
    9156             :                         /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
    9157             :                                         dfWeight, dfWeightGEOS);
    9158             :                         printf("xy2: ");  // ok
    9159             :                         for (const auto &xy : xy2)
    9160             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    9161             :                         printf("\n");                                   // ok
    9162             :                         printf("intersection: ");                       // ok
    9163             :                         for (const auto &xy : intersection)
    9164             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    9165             :                         printf("\n");                                   // ok
    9166             :                     }
    9167             : #endif
    9168             :                 }
    9169      801628 :                 if (dfWeight > 0.0)
    9170             :                 {
    9171             : #ifdef DEBUG_VERBOSE
    9172             : #if defined(DST_X) && defined(DST_Y)
    9173             :                     if (iDstX + poWK->nDstXOff == DST_X &&
    9174             :                         iDstY + poWK->nDstYOff == DST_Y)
    9175             :                     {
    9176             :                         CPLDebug("WARP",
    9177             :                                  "iSrcX = %d, iSrcY = %d, weight =%.17g",
    9178             :                                  sp.iSrcX + poWK->nSrcXOff,
    9179             :                                  sp.iSrcY + poWK->nSrcYOff, dfWeight);
    9180             :                     }
    9181             : #endif
    9182             : #endif
    9183             : 
    9184      474104 :                     const GPtrDiff_t iSrcOffset =
    9185      474104 :                         sp.iSrcX +
    9186      474104 :                         static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
    9187      474104 :                     dfTotalWeight += dfWeight;
    9188             : 
    9189      474104 :                     if (poWK->pafUnifiedSrcDensity != nullptr)
    9190             :                     {
    9191           0 :                         dfDensity +=
    9192           0 :                             dfWeight *
    9193           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    9194             :                     }
    9195             :                     else
    9196             :                     {
    9197      474104 :                         dfDensity += dfWeight;
    9198             :                     }
    9199             : 
    9200     1818730 :                     for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    9201             :                     {
    9202             :                         // Returns pixel value if it is not no data.
    9203             :                         double dfBandDensity;
    9204             :                         double dfRealValue;
    9205             :                         double dfImagValue;
    9206     2689250 :                         if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
    9207             :                                                &dfBandDensity, &dfRealValue,
    9208             :                                                &dfImagValue) &&
    9209     1344620 :                               dfBandDensity > BAND_DENSITY_THRESHOLD))
    9210             :                         {
    9211           0 :                             continue;
    9212             :                         }
    9213             : #ifdef DEBUG_VERBOSE
    9214             : #if defined(DST_X) && defined(DST_Y)
    9215             :                         if (iDstX + poWK->nDstXOff == DST_X &&
    9216             :                             iDstY + poWK->nDstYOff == DST_Y)
    9217             :                         {
    9218             :                             CPLDebug("WARP", "value * weight = %.17g",
    9219             :                                      dfRealValue * dfWeight);
    9220             :                         }
    9221             : #endif
    9222             : #endif
    9223             : 
    9224     1344620 :                         adfRealValue[iBand] += dfRealValue * dfWeight;
    9225     1344620 :                         adfImagValue[iBand] += dfImagValue * dfWeight;
    9226     1344620 :                         adfBandDensity[iBand] += dfBandDensity * dfWeight;
    9227     1344620 :                         adfWeight[iBand] += dfWeight;
    9228             :                     }
    9229             :                 }
    9230             :             }
    9231             : 
    9232      218919 :             CPLFree(pahSourcePixel);
    9233             : 
    9234             :             /* --------------------------------------------------------------------
    9235             :              */
    9236             :             /*          Update destination pixel value. */
    9237             :             /* --------------------------------------------------------------------
    9238             :              */
    9239      218919 :             bool bHasFoundDensity = false;
    9240      218919 :             const GPtrDiff_t iDstOffset =
    9241      218919 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    9242      827838 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    9243             :             {
    9244      608919 :                 if (adfWeight[iBand] > 0)
    9245             :                 {
    9246             :                     const double dfBandDensity =
    9247      608909 :                         adfBandDensity[iBand] / adfWeight[iBand];
    9248      608909 :                     if (dfBandDensity > BAND_DENSITY_THRESHOLD)
    9249             :                     {
    9250      608909 :                         bHasFoundDensity = true;
    9251      608909 :                         GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    9252      608909 :                                          adfRealValue[iBand],
    9253      608909 :                                          adfImagValue[iBand],
    9254             :                                          bAvoidNoDataSingleBand);
    9255             :                     }
    9256             :                 }
    9257             :             }
    9258             : 
    9259      218919 :             if (!bHasFoundDensity)
    9260          10 :                 continue;
    9261             : 
    9262      218909 :             if (!bAvoidNoDataSingleBand)
    9263             :             {
    9264           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    9265             :             }
    9266             : 
    9267             :             /* --------------------------------------------------------------------
    9268             :              */
    9269             :             /*          Update destination density/validity masks. */
    9270             :             /* --------------------------------------------------------------------
    9271             :              */
    9272      218909 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
    9273             : 
    9274      218909 :             if (poWK->panDstValid != nullptr)
    9275             :             {
    9276       11752 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    9277             :             }
    9278             :         }
    9279             : 
    9280             :         /* --------------------------------------------------------------------
    9281             :          */
    9282             :         /*      Report progress to the user, and optionally cancel out. */
    9283             :         /* --------------------------------------------------------------------
    9284             :          */
    9285        1932 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    9286           0 :             break;
    9287             :     }
    9288             : 
    9289             : #ifdef CHECK_SUM_WITH_GEOS
    9290             :     GEOSGeom_destroy_r(hGEOSContext, hP1);
    9291             :     GEOSGeom_destroy_r(hGEOSContext, hP2);
    9292             :     OGRGeometry::freeGEOSContext(hGEOSContext);
    9293             : #endif
    9294          19 :     CPLQuadTreeDestroy(hQuadTree);
    9295          19 : }

Generated by: LCOV version 1.14