LCOV - code coverage report
Current view: top level - alg - gdalwarpkernel.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 3416 3992 85.6 %
Date: 2026-01-16 04:37:55 Functions: 230 266 86.5 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  High Performance Image Reprojector
       4             :  * Purpose:  Implementation of the GDALWarpKernel class.  Implements the actual
       5             :  *           image warping for a "chunk" of input and output imagery already
       6             :  *           loaded into memory.
       7             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       8             :  *
       9             :  ******************************************************************************
      10             :  * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
      11             :  * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
      12             :  *
      13             :  * SPDX-License-Identifier: MIT
      14             :  ****************************************************************************/
      15             : 
      16             : #include "cpl_port.h"
      17             : #include "gdalwarper.h"
      18             : 
      19             : #include <cfloat>
      20             : #include <cmath>
      21             : #include <cstddef>
      22             : #include <cstdlib>
      23             : #include <cstring>
      24             : 
      25             : #include <algorithm>
      26             : #include <limits>
      27             : #include <mutex>
      28             : #include <new>
      29             : #include <utility>
      30             : #include <vector>
      31             : 
      32             : #include "cpl_atomic_ops.h"
      33             : #include "cpl_conv.h"
      34             : #include "cpl_error.h"
      35             : #include "cpl_float.h"
      36             : #include "cpl_mask.h"
      37             : #include "cpl_multiproc.h"
      38             : #include "cpl_progress.h"
      39             : #include "cpl_string.h"
      40             : #include "cpl_vsi.h"
      41             : #include "cpl_worker_thread_pool.h"
      42             : #include "cpl_quad_tree.h"
      43             : #include "gdal.h"
      44             : #include "gdal_alg.h"
      45             : #include "gdal_alg_priv.h"
      46             : #include "gdal_thread_pool.h"
      47             : #include "gdalresamplingkernels.h"
      48             : 
      49             : // #define CHECK_SUM_WITH_GEOS
      50             : #ifdef CHECK_SUM_WITH_GEOS
      51             : #include "ogr_geometry.h"
      52             : #include "ogr_geos.h"
      53             : #endif
      54             : 
      55             : #ifdef USE_NEON_OPTIMIZATIONS
      56             : #include "include_sse2neon.h"
      57             : #define USE_SSE2
      58             : 
      59             : #include "gdalsse_priv.h"
      60             : 
      61             : // We restrict to 64bit processors because they are guaranteed to have SSE2.
      62             : // Could possibly be used too on 32bit, but we would need to check at runtime.
      63             : #elif defined(__x86_64) || defined(_M_X64)
      64             : #define USE_SSE2
      65             : 
      66             : #include "gdalsse_priv.h"
      67             : 
      68             : #if __SSE4_1__
      69             : #include <smmintrin.h>
      70             : #endif
      71             : 
      72             : #if __SSE3__
      73             : #include <pmmintrin.h>
      74             : #endif
      75             : 
      76             : #endif
      77             : 
      78             : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
      79             : constexpr float SRC_DENSITY_THRESHOLD_FLOAT = 0.000000001f;
      80             : constexpr double SRC_DENSITY_THRESHOLD_DOUBLE = 0.000000001;
      81             : 
      82             : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
      83             : 
      84             : static const int anGWKFilterRadius[] = {
      85             :     0,  // Nearest neighbour
      86             :     1,  // Bilinear
      87             :     2,  // Cubic Convolution (Catmull-Rom)
      88             :     2,  // Cubic B-Spline
      89             :     3,  // Lanczos windowed sinc
      90             :     0,  // Average
      91             :     0,  // Mode
      92             :     0,  // Reserved GRA_Gauss=7
      93             :     0,  // Max
      94             :     0,  // Min
      95             :     0,  // Med
      96             :     0,  // Q1
      97             :     0,  // Q3
      98             :     0,  // Sum
      99             :     0,  // RMS
     100             : };
     101             : 
     102             : static double GWKBilinear(double dfX);
     103             : static double GWKCubic(double dfX);
     104             : static double GWKBSpline(double dfX);
     105             : static double GWKLanczosSinc(double dfX);
     106             : 
     107             : static const FilterFuncType apfGWKFilter[] = {
     108             :     nullptr,         // Nearest neighbour
     109             :     GWKBilinear,     // Bilinear
     110             :     GWKCubic,        // Cubic Convolution (Catmull-Rom)
     111             :     GWKBSpline,      // Cubic B-Spline
     112             :     GWKLanczosSinc,  // Lanczos windowed sinc
     113             :     nullptr,         // Average
     114             :     nullptr,         // Mode
     115             :     nullptr,         // Reserved GRA_Gauss=7
     116             :     nullptr,         // Max
     117             :     nullptr,         // Min
     118             :     nullptr,         // Med
     119             :     nullptr,         // Q1
     120             :     nullptr,         // Q3
     121             :     nullptr,         // Sum
     122             :     nullptr,         // RMS
     123             : };
     124             : 
     125             : // TODO(schwehr): Can we make these functions have a const * const arg?
     126             : static double GWKBilinear4Values(double *padfVals);
     127             : static double GWKCubic4Values(double *padfVals);
     128             : static double GWKBSpline4Values(double *padfVals);
     129             : static double GWKLanczosSinc4Values(double *padfVals);
     130             : 
     131             : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
     132             :     nullptr,                // Nearest neighbour
     133             :     GWKBilinear4Values,     // Bilinear
     134             :     GWKCubic4Values,        // Cubic Convolution (Catmull-Rom)
     135             :     GWKBSpline4Values,      // Cubic B-Spline
     136             :     GWKLanczosSinc4Values,  // Lanczos windowed sinc
     137             :     nullptr,                // Average
     138             :     nullptr,                // Mode
     139             :     nullptr,                // Reserved GRA_Gauss=7
     140             :     nullptr,                // Max
     141             :     nullptr,                // Min
     142             :     nullptr,                // Med
     143             :     nullptr,                // Q1
     144             :     nullptr,                // Q3
     145             :     nullptr,                // Sum
     146             :     nullptr,                // RMS
     147             : };
     148             : 
     149       13419 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
     150             : {
     151             :     static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
     152             :                   "Bad size of anGWKFilterRadius");
     153       13419 :     return anGWKFilterRadius[eResampleAlg];
     154             : }
     155             : 
     156        5093 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
     157             : {
     158             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
     159             :                   "Bad size of apfGWKFilter");
     160        5093 :     return apfGWKFilter[eResampleAlg];
     161             : }
     162             : 
     163        5093 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
     164             : {
     165             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
     166             :                   "Bad size of apfGWKFilter4Values");
     167        5093 :     return apfGWKFilter4Values[eResampleAlg];
     168             : }
     169             : 
     170             : static CPLErr GWKGeneralCase(GDALWarpKernel *);
     171             : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
     172             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     173             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     174             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     175             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     176             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     177             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     178             : #endif
     179             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     180             : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
     181             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     182             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     183             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     184             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     185             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     186             : #endif
     187             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     188             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     189             : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
     190             : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
     191             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     192             : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
     193             : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
     194             : static CPLErr GWKSumPreserving(GDALWarpKernel *);
     195             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     196             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     197             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     198             : 
     199             : /************************************************************************/
     200             : /*                           GWKJobStruct                               */
     201             : /************************************************************************/
     202             : 
     203             : struct GWKJobStruct
     204             : {
     205             :     std::mutex &mutex;
     206             :     std::condition_variable &cv;
     207             :     int counterSingleThreaded = 0;
     208             :     int &counter;
     209             :     bool &stopFlag;
     210             :     GDALWarpKernel *poWK = nullptr;
     211             :     int iYMin = 0;
     212             :     int iYMax = 0;
     213             :     int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
     214             :     void *pTransformerArg = nullptr;
     215             :     // used by GWKRun() to assign the proper pTransformerArg
     216             :     void (*pfnFunc)(void *) = nullptr;
     217             : 
     218        2932 :     GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
     219             :                  int &counter_, bool &stopFlag_)
     220        2932 :         : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
     221             :     {
     222        2932 :     }
     223             : };
     224             : 
     225             : struct GWKThreadData
     226             : {
     227             :     std::unique_ptr<CPLJobQueue> poJobQueue{};
     228             :     std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
     229             :     int nMaxThreads{0};
     230             :     int counter{0};
     231             :     bool stopFlag{false};
     232             :     std::mutex mutex{};
     233             :     std::condition_variable cv{};
     234             :     bool bTransformerArgInputAssignedToThread{false};
     235             :     void *pTransformerArgInput{
     236             :         nullptr};  // owned by calling layer. Not to be destroyed
     237             :     std::map<GIntBig, void *> mapThreadToTransformerArg{};
     238             :     int nTotalThreadCountForThisRun = 0;
     239             :     int nCurThreadCountForThisRun = 0;
     240             : };
     241             : 
     242             : /************************************************************************/
     243             : /*                        GWKProgressThread()                           */
     244             : /************************************************************************/
     245             : 
     246             : // Return TRUE if the computation must be interrupted.
     247          36 : static int GWKProgressThread(GWKJobStruct *psJob)
     248             : {
     249          36 :     bool stop = false;
     250             :     {
     251          36 :         std::lock_guard<std::mutex> lock(psJob->mutex);
     252          36 :         psJob->counter++;
     253          36 :         stop = psJob->stopFlag;
     254             :     }
     255          36 :     psJob->cv.notify_one();
     256             : 
     257          36 :     return stop;
     258             : }
     259             : 
     260             : /************************************************************************/
     261             : /*                      GWKProgressMonoThread()                         */
     262             : /************************************************************************/
     263             : 
     264             : // Return TRUE if the computation must be interrupted.
     265      378617 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
     266             : {
     267      378617 :     GDALWarpKernel *poWK = psJob->poWK;
     268      378617 :     if (!poWK->pfnProgress(poWK->dfProgressBase +
     269      378617 :                                poWK->dfProgressScale *
     270      378617 :                                    (++psJob->counterSingleThreaded /
     271      378617 :                                     static_cast<double>(psJob->iYMax)),
     272             :                            "", poWK->pProgress))
     273             :     {
     274           1 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     275           1 :         psJob->stopFlag = true;
     276           1 :         return TRUE;
     277             :     }
     278      378616 :     return FALSE;
     279             : }
     280             : 
     281             : /************************************************************************/
     282             : /*                       GWKGenericMonoThread()                         */
     283             : /************************************************************************/
     284             : 
     285        2910 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
     286             :                                    void (*pfnFunc)(void *pUserData))
     287             : {
     288        2910 :     GWKThreadData td;
     289             : 
     290             :     // NOTE: the mutex is not used.
     291        2910 :     GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
     292        2910 :     job.poWK = poWK;
     293        2910 :     job.iYMin = 0;
     294        2910 :     job.iYMax = poWK->nDstYSize;
     295        2910 :     job.pfnProgress = GWKProgressMonoThread;
     296        2910 :     job.pTransformerArg = poWK->pTransformerArg;
     297        2910 :     job.counterSingleThreaded = td.counter;
     298        2910 :     pfnFunc(&job);
     299        2910 :     td.counter = job.counterSingleThreaded;
     300             : 
     301        5820 :     return td.stopFlag ? CE_Failure : CE_None;
     302             : }
     303             : 
     304             : /************************************************************************/
     305             : /*                          GWKThreadsCreate()                          */
     306             : /************************************************************************/
     307             : 
     308        1744 : void *GWKThreadsCreate(char **papszWarpOptions,
     309             :                        GDALTransformerFunc /* pfnTransformer */,
     310             :                        void *pTransformerArg)
     311             : {
     312             :     const char *pszWarpThreads =
     313        1744 :         CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
     314        1744 :     if (pszWarpThreads == nullptr)
     315        1727 :         pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
     316             : 
     317        1744 :     int nThreads = 0;
     318        1744 :     if (EQUAL(pszWarpThreads, "ALL_CPUS"))
     319           3 :         nThreads = CPLGetNumCPUs();
     320             :     else
     321        1741 :         nThreads = atoi(pszWarpThreads);
     322        1744 :     if (nThreads <= 1)
     323        1722 :         nThreads = 0;
     324        1744 :     if (nThreads > 128)
     325           0 :         nThreads = 128;
     326             : 
     327        1744 :     GWKThreadData *psThreadData = new GWKThreadData();
     328             :     auto poThreadPool =
     329        1744 :         nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
     330        1744 :     if (nThreads && poThreadPool)
     331             :     {
     332          22 :         psThreadData->nMaxThreads = nThreads;
     333          22 :         psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
     334             :             nThreads,
     335          22 :             GWKJobStruct(psThreadData->mutex, psThreadData->cv,
     336          44 :                          psThreadData->counter, psThreadData->stopFlag)));
     337             : 
     338          22 :         psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
     339          22 :         psThreadData->pTransformerArgInput = pTransformerArg;
     340             :     }
     341             : 
     342        1744 :     return psThreadData;
     343             : }
     344             : 
     345             : /************************************************************************/
     346             : /*                             GWKThreadsEnd()                          */
     347             : /************************************************************************/
     348             : 
     349        1744 : void GWKThreadsEnd(void *psThreadDataIn)
     350             : {
     351        1744 :     if (psThreadDataIn == nullptr)
     352           0 :         return;
     353             : 
     354        1744 :     GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
     355        1744 :     if (psThreadData->poJobQueue)
     356             :     {
     357             :         // cppcheck-suppress constVariableReference
     358          32 :         for (auto &pair : psThreadData->mapThreadToTransformerArg)
     359             :         {
     360          10 :             CPLAssert(pair.second != psThreadData->pTransformerArgInput);
     361          10 :             GDALDestroyTransformer(pair.second);
     362             :         }
     363          22 :         psThreadData->poJobQueue.reset();
     364             :     }
     365        1744 :     delete psThreadData;
     366             : }
     367             : 
     368             : /************************************************************************/
     369             : /*                         ThreadFuncAdapter()                          */
     370             : /************************************************************************/
     371             : 
     372          31 : static void ThreadFuncAdapter(void *pData)
     373             : {
     374          31 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
     375          31 :     GWKThreadData *psThreadData =
     376          31 :         static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
     377             : 
     378             :     // Look if we have already a per-thread transformer
     379          31 :     void *pTransformerArg = nullptr;
     380          31 :     const GIntBig nThreadId = CPLGetPID();
     381             : 
     382             :     {
     383          62 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     384          31 :         ++psThreadData->nCurThreadCountForThisRun;
     385             : 
     386          31 :         auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
     387          31 :         if (oIter != psThreadData->mapThreadToTransformerArg.end())
     388             :         {
     389           1 :             pTransformerArg = oIter->second;
     390             :         }
     391          30 :         else if (!psThreadData->bTransformerArgInputAssignedToThread &&
     392          30 :                  psThreadData->nCurThreadCountForThisRun ==
     393          30 :                      psThreadData->nTotalThreadCountForThisRun)
     394             :         {
     395             :             // If we are the last thread to be started, temporarily borrow the
     396             :             // original transformer
     397          20 :             psThreadData->bTransformerArgInputAssignedToThread = true;
     398          20 :             pTransformerArg = psThreadData->pTransformerArgInput;
     399          20 :             psThreadData->mapThreadToTransformerArg[nThreadId] =
     400             :                 pTransformerArg;
     401             :         }
     402             : 
     403          31 :         if (pTransformerArg == nullptr)
     404             :         {
     405          10 :             CPLAssert(psThreadData->pTransformerArgInput != nullptr);
     406          10 :             CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
     407             :         }
     408             :     }
     409             : 
     410             :     // If no transformer assigned to current thread, instantiate one
     411          31 :     if (pTransformerArg == nullptr)
     412             :     {
     413             :         // This somehow assumes that GDALCloneTransformer() is thread-safe
     414             :         // which should normally be the case.
     415             :         pTransformerArg =
     416          10 :             GDALCloneTransformer(psThreadData->pTransformerArgInput);
     417             : 
     418             :         // Lock for the stop flag and the transformer map.
     419          10 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     420          10 :         if (!pTransformerArg)
     421             :         {
     422           0 :             psJob->stopFlag = true;
     423           0 :             return;
     424             :         }
     425          10 :         psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
     426             :     }
     427             : 
     428          31 :     psJob->pTransformerArg = pTransformerArg;
     429          31 :     psJob->pfnFunc(pData);
     430             : 
     431             :     // Give back original transformer, if borrowed.
     432             :     {
     433          62 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     434          31 :         if (psThreadData->bTransformerArgInputAssignedToThread &&
     435          21 :             pTransformerArg == psThreadData->pTransformerArgInput)
     436             :         {
     437             :             psThreadData->mapThreadToTransformerArg.erase(
     438          20 :                 psThreadData->mapThreadToTransformerArg.find(nThreadId));
     439          20 :             psThreadData->bTransformerArgInputAssignedToThread = false;
     440             :         }
     441             :     }
     442             : }
     443             : 
     444             : /************************************************************************/
     445             : /*                                GWKRun()                              */
     446             : /************************************************************************/
     447             : 
     448        2931 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
     449             :                      void (*pfnFunc)(void *pUserData))
     450             : 
     451             : {
     452        2931 :     const int nDstYSize = poWK->nDstYSize;
     453             : 
     454        2931 :     CPLDebug("GDAL",
     455             :              "GDALWarpKernel()::%s() "
     456             :              "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
     457             :              pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
     458             :              poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
     459             :              poWK->nDstYSize);
     460             : 
     461        2931 :     if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
     462             :     {
     463           0 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     464           0 :         return CE_Failure;
     465             :     }
     466             : 
     467        2931 :     GWKThreadData *psThreadData =
     468             :         static_cast<GWKThreadData *>(poWK->psThreadData);
     469        2931 :     if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
     470             :     {
     471        2910 :         return GWKGenericMonoThread(poWK, pfnFunc);
     472             :     }
     473             : 
     474          21 :     int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
     475             :     // Config option mostly useful for tests to be able to test multithreading
     476             :     // with small rasters
     477             :     const int nWarpChunkSize =
     478          21 :         atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
     479          21 :     if (nWarpChunkSize > 0)
     480             :     {
     481          19 :         GIntBig nChunks =
     482          19 :             static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
     483          19 :         if (nThreads > nChunks)
     484          14 :             nThreads = static_cast<int>(nChunks);
     485             :     }
     486          21 :     if (nThreads <= 0)
     487          17 :         nThreads = 1;
     488             : 
     489          21 :     CPLDebug("WARP", "Using %d threads", nThreads);
     490             : 
     491          21 :     auto &jobs = *psThreadData->threadJobs;
     492          21 :     CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
     493             :     // Fill-in job structures.
     494          52 :     for (int i = 0; i < nThreads; ++i)
     495             :     {
     496          31 :         auto &job = jobs[i];
     497          31 :         job.poWK = poWK;
     498          31 :         job.iYMin =
     499          31 :             static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
     500          31 :         job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
     501          31 :                                      nThreads);
     502          31 :         if (poWK->pfnProgress != GDALDummyProgress)
     503           2 :             job.pfnProgress = GWKProgressThread;
     504          31 :         job.pfnFunc = pfnFunc;
     505             :     }
     506             : 
     507             :     bool bStopFlag;
     508             :     {
     509          21 :         std::unique_lock<std::mutex> lock(psThreadData->mutex);
     510             : 
     511          21 :         psThreadData->nTotalThreadCountForThisRun = nThreads;
     512             :         // coverity[missing_lock]
     513          21 :         psThreadData->nCurThreadCountForThisRun = 0;
     514             : 
     515             :         // Start jobs.
     516          52 :         for (int i = 0; i < nThreads; ++i)
     517             :         {
     518          31 :             auto &job = jobs[i];
     519          31 :             psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
     520             :                                                 static_cast<void *>(&job));
     521             :         }
     522             : 
     523             :         /* --------------------------------------------------------------------
     524             :          */
     525             :         /*      Report progress. */
     526             :         /* --------------------------------------------------------------------
     527             :          */
     528          21 :         if (poWK->pfnProgress != GDALDummyProgress)
     529             :         {
     530           3 :             while (psThreadData->counter < nDstYSize)
     531             :             {
     532           2 :                 psThreadData->cv.wait(lock);
     533           2 :                 if (!poWK->pfnProgress(poWK->dfProgressBase +
     534           2 :                                            poWK->dfProgressScale *
     535           2 :                                                (psThreadData->counter /
     536           2 :                                                 static_cast<double>(nDstYSize)),
     537             :                                        "", poWK->pProgress))
     538             :                 {
     539           1 :                     CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     540           1 :                     psThreadData->stopFlag = true;
     541           1 :                     break;
     542             :                 }
     543             :             }
     544             :         }
     545             : 
     546          21 :         bStopFlag = psThreadData->stopFlag;
     547             :     }
     548             : 
     549             :     /* -------------------------------------------------------------------- */
     550             :     /*      Wait for all jobs to complete.                                  */
     551             :     /* -------------------------------------------------------------------- */
     552          21 :     psThreadData->poJobQueue->WaitCompletion();
     553             : 
     554          21 :     return bStopFlag ? CE_Failure : CE_None;
     555             : }
     556             : 
     557             : /************************************************************************/
     558             : /* ==================================================================== */
     559             : /*                            GDALWarpKernel                            */
     560             : /* ==================================================================== */
     561             : /************************************************************************/
     562             : 
     563             : /**
     564             :  * \class GDALWarpKernel "gdalwarper.h"
     565             :  *
     566             :  * Low level image warping class.
     567             :  *
     568             :  * This class is responsible for low level image warping for one
     569             :  * "chunk" of imagery.  The class is essentially a structure with all
     570             :  * data members public - primarily so that new special-case functions
     571             :  * can be added without changing the class declaration.
     572             :  *
     573             :  * Applications are normally intended to interactive with warping facilities
     574             :  * through the GDALWarpOperation class, though the GDALWarpKernel can in
     575             :  * theory be used directly if great care is taken in setting up the
     576             :  * control data.
     577             :  *
     578             :  * <h3>Design Issues</h3>
     579             :  *
     580             :  * The intention is that PerformWarp() would analyze the setup in terms
     581             :  * of the datatype, resampling type, and validity/density mask usage and
     582             :  * pick one of many specific implementations of the warping algorithm over
     583             :  * a continuum of optimization vs. generality.  At one end there will be a
     584             :  * reference general purpose implementation of the algorithm that supports
     585             :  * any data type (working internally in double precision complex), all three
     586             :  * resampling types, and any or all of the validity/density masks.  At the
     587             :  * other end would be highly optimized algorithms for common cases like
     588             :  * nearest neighbour resampling on GDT_UInt8 data with no masks.
     589             :  *
     590             :  * The full set of optimized versions have not been decided but we should
     591             :  * expect to have at least:
     592             :  *  - One for each resampling algorithm for 8bit data with no masks.
     593             :  *  - One for each resampling algorithm for float data with no masks.
     594             :  *  - One for each resampling algorithm for float data with any/all masks
     595             :  *    (essentially the generic case for just float data).
     596             :  *  - One for each resampling algorithm for 8bit data with support for
     597             :  *    input validity masks (per band or per pixel).  This handles the common
     598             :  *    case of nodata masking.
     599             :  *  - One for each resampling algorithm for float data with support for
     600             :  *    input validity masks (per band or per pixel).  This handles the common
     601             :  *    case of nodata masking.
     602             :  *
     603             :  * Some of the specializations would operate on all bands in one pass
     604             :  * (especially the ones without masking would do this), while others might
     605             :  * process each band individually to reduce code complexity.
     606             :  *
     607             :  * <h3>Masking Semantics</h3>
     608             :  *
     609             :  * A detailed explanation of the semantics of the validity and density masks,
     610             :  * and their effects on resampling kernels is needed here.
     611             :  */
     612             : 
     613             : /************************************************************************/
     614             : /*                     GDALWarpKernel Data Members                      */
     615             : /************************************************************************/
     616             : 
     617             : /**
     618             :  * \var GDALResampleAlg GDALWarpKernel::eResample;
     619             :  *
     620             :  * Resampling algorithm.
     621             :  *
     622             :  * The resampling algorithm to use.  One of GRA_NearestNeighbour, GRA_Bilinear,
     623             :  * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
     624             :  * GRA_Mode or GRA_Sum.
     625             :  *
     626             :  * This field is required. GDT_NearestNeighbour may be used as a default
     627             :  * value.
     628             :  */
     629             : 
     630             : /**
     631             :  * \var GDALDataType GDALWarpKernel::eWorkingDataType;
     632             :  *
     633             :  * Working pixel data type.
     634             :  *
     635             :  * The datatype of pixels in the source image (papabySrcimage) and
     636             :  * destination image (papabyDstImage) buffers.  Note that operations on
     637             :  * some data types (such as GDT_UInt8) may be much better optimized than other
     638             :  * less common cases.
     639             :  *
     640             :  * This field is required.  It may not be GDT_Unknown.
     641             :  */
     642             : 
     643             : /**
     644             :  * \var int GDALWarpKernel::nBands;
     645             :  *
     646             :  * Number of bands.
     647             :  *
     648             :  * The number of bands (layers) of imagery being warped.  Determines the
     649             :  * number of entries in the papabySrcImage, papanBandSrcValid,
     650             :  * and papabyDstImage arrays.
     651             :  *
     652             :  * This field is required.
     653             :  */
     654             : 
     655             : /**
     656             :  * \var int GDALWarpKernel::nSrcXSize;
     657             :  *
     658             :  * Source image width in pixels.
     659             :  *
     660             :  * This field is required.
     661             :  */
     662             : 
     663             : /**
     664             :  * \var int GDALWarpKernel::nSrcYSize;
     665             :  *
     666             :  * Source image height in pixels.
     667             :  *
     668             :  * This field is required.
     669             :  */
     670             : 
     671             : /**
     672             :  * \var double GDALWarpKernel::dfSrcXExtraSize;
     673             :  *
     674             :  * Number of pixels included in nSrcXSize that are present on the edges of
     675             :  * the area of interest to take into account the width of the kernel.
     676             :  *
     677             :  * This field is required.
     678             :  */
     679             : 
     680             : /**
     681             :  * \var double GDALWarpKernel::dfSrcYExtraSize;
     682             :  *
     683             :  * Number of pixels included in nSrcYExtraSize that are present on the edges of
     684             :  * the area of interest to take into account the height of the kernel.
     685             :  *
     686             :  * This field is required.
     687             :  */
     688             : 
     689             : /**
     690             :  * \var int GDALWarpKernel::papabySrcImage;
     691             :  *
     692             :  * Array of source image band data.
     693             :  *
     694             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     695             :  * to image data.  Each individual band of image data is organized as a single
     696             :  * block of image data in left to right, then bottom to top order.  The actual
     697             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     698             :  *
     699             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     700             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     701             :  * this:
     702             :  *
     703             :  * \code
     704             :  *   float dfPixelValue;
     705             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     706             :  *   int   nPixel = 3; // Zero based.
     707             :  *   int   nLine = 4;  // Zero based.
     708             :  *
     709             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     710             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     711             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     712             :  *   dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
     713             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     714             :  * \endcode
     715             :  *
     716             :  * This field is required.
     717             :  */
     718             : 
     719             : /**
     720             :  * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
     721             :  *
     722             :  * Per band validity mask for source pixels.
     723             :  *
     724             :  * Array of pixel validity mask layers for each source band.   Each of
     725             :  * the mask layers is the same size (in pixels) as the source image with
     726             :  * one bit per pixel.  Note that it is legal (and common) for this to be
     727             :  * NULL indicating that none of the pixels are invalidated, or for some
     728             :  * band validity masks to be NULL in which case all pixels of the band are
     729             :  * valid.  The following code can be used to test the validity of a particular
     730             :  * pixel.
     731             :  *
     732             :  * \code
     733             :  *   int   bIsValid = TRUE;
     734             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     735             :  *   int   nPixel = 3; // Zero based.
     736             :  *   int   nLine = 4;  // Zero based.
     737             :  *
     738             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     739             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     740             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     741             :  *
     742             :  *   if( poKern->papanBandSrcValid != NULL
     743             :  *       && poKern->papanBandSrcValid[nBand] != NULL )
     744             :  *   {
     745             :  *       GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
     746             :  *       int    iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
     747             :  *
     748             :  *       bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
     749             :  *   }
     750             :  * \endcode
     751             :  */
     752             : 
     753             : /**
     754             :  * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
     755             :  *
     756             :  * Per pixel validity mask for source pixels.
     757             :  *
     758             :  * A single validity mask layer that applies to the pixels of all source
     759             :  * bands.  It is accessed similarly to papanBandSrcValid, but without the
     760             :  * extra level of band indirection.
     761             :  *
     762             :  * This pointer may be NULL indicating that all pixels are valid.
     763             :  *
     764             :  * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
     765             :  * the pixel isn't considered to be valid unless both arrays indicate it is
     766             :  * valid.
     767             :  */
     768             : 
     769             : /**
     770             :  * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
     771             :  *
     772             :  * Per pixel density mask for source pixels.
     773             :  *
     774             :  * A single density mask layer that applies to the pixels of all source
     775             :  * bands.  It contains values between 0.0 and 1.0 indicating the degree to
     776             :  * which this pixel should be allowed to contribute to the output result.
     777             :  *
     778             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     779             :  *
     780             :  * The density for a pixel may be accessed like this:
     781             :  *
     782             :  * \code
     783             :  *   float fDensity = 1.0;
     784             :  *   int nPixel = 3;  // Zero based.
     785             :  *   int nLine = 4;   // Zero based.
     786             :  *
     787             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     788             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     789             :  *   if( poKern->pafUnifiedSrcDensity != NULL )
     790             :  *     fDensity = poKern->pafUnifiedSrcDensity
     791             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     792             :  * \endcode
     793             :  */
     794             : 
     795             : /**
     796             :  * \var int GDALWarpKernel::nDstXSize;
     797             :  *
     798             :  * Width of destination image in pixels.
     799             :  *
     800             :  * This field is required.
     801             :  */
     802             : 
     803             : /**
     804             :  * \var int GDALWarpKernel::nDstYSize;
     805             :  *
     806             :  * Height of destination image in pixels.
     807             :  *
     808             :  * This field is required.
     809             :  */
     810             : 
     811             : /**
     812             :  * \var GByte **GDALWarpKernel::papabyDstImage;
     813             :  *
     814             :  * Array of destination image band data.
     815             :  *
     816             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     817             :  * to image data.  Each individual band of image data is organized as a single
     818             :  * block of image data in left to right, then bottom to top order.  The actual
     819             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     820             :  *
     821             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     822             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     823             :  * this:
     824             :  *
     825             :  * \code
     826             :  *   float dfPixelValue;
     827             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     828             :  *   int   nPixel = 3; // Zero based.
     829             :  *   int   nLine = 4;  // Zero based.
     830             :  *
     831             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     832             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     833             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     834             :  *   dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
     835             :  *                                  [nPixel + nLine * poKern->nSrcYSize];
     836             :  * \endcode
     837             :  *
     838             :  * This field is required.
     839             :  */
     840             : 
     841             : /**
     842             :  * \var GUInt32 *GDALWarpKernel::panDstValid;
     843             :  *
     844             :  * Per pixel validity mask for destination pixels.
     845             :  *
     846             :  * A single validity mask layer that applies to the pixels of all destination
     847             :  * bands.  It is accessed similarly to papanUnitifiedSrcValid, but based
     848             :  * on the size of the destination image.
     849             :  *
     850             :  * This pointer may be NULL indicating that all pixels are valid.
     851             :  */
     852             : 
     853             : /**
     854             :  * \var float *GDALWarpKernel::pafDstDensity;
     855             :  *
     856             :  * Per pixel density mask for destination pixels.
     857             :  *
     858             :  * A single density mask layer that applies to the pixels of all destination
     859             :  * bands.  It contains values between 0.0 and 1.0.
     860             :  *
     861             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     862             :  *
     863             :  * The density for a pixel may be accessed like this:
     864             :  *
     865             :  * \code
     866             :  *   float fDensity = 1.0;
     867             :  *   int   nPixel = 3; // Zero based.
     868             :  *   int   nLine = 4;  // Zero based.
     869             :  *
     870             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     871             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     872             :  *   if( poKern->pafDstDensity != NULL )
     873             :  *     fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
     874             :  * \endcode
     875             :  */
     876             : 
     877             : /**
     878             :  * \var int GDALWarpKernel::nSrcXOff;
     879             :  *
     880             :  * X offset to source pixel coordinates for transformation.
     881             :  *
     882             :  * See pfnTransformer.
     883             :  *
     884             :  * This field is required.
     885             :  */
     886             : 
     887             : /**
     888             :  * \var int GDALWarpKernel::nSrcYOff;
     889             :  *
     890             :  * Y offset to source pixel coordinates for transformation.
     891             :  *
     892             :  * See pfnTransformer.
     893             :  *
     894             :  * This field is required.
     895             :  */
     896             : 
     897             : /**
     898             :  * \var int GDALWarpKernel::nDstXOff;
     899             :  *
     900             :  * X offset to destination pixel coordinates for transformation.
     901             :  *
     902             :  * See pfnTransformer.
     903             :  *
     904             :  * This field is required.
     905             :  */
     906             : 
     907             : /**
     908             :  * \var int GDALWarpKernel::nDstYOff;
     909             :  *
     910             :  * Y offset to destination pixel coordinates for transformation.
     911             :  *
     912             :  * See pfnTransformer.
     913             :  *
     914             :  * This field is required.
     915             :  */
     916             : 
     917             : /**
     918             :  * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
     919             :  *
     920             :  * Source/destination location transformer.
     921             :  *
     922             :  * The function to call to transform coordinates between source image
     923             :  * pixel/line coordinates and destination image pixel/line coordinates.
     924             :  * See GDALTransformerFunc() for details of the semantics of this function.
     925             :  *
     926             :  * The GDALWarpKern algorithm will only ever use this transformer in
     927             :  * "destination to source" mode (bDstToSrc=TRUE), and will always pass
     928             :  * partial or complete scanlines of points in the destination image as
     929             :  * input.  This means, among other things, that it is safe to the
     930             :  * approximating transform GDALApproxTransform() as the transformation
     931             :  * function.
     932             :  *
     933             :  * Source and destination images may be subsets of a larger overall image.
     934             :  * The transformation algorithms will expect and return pixel/line coordinates
     935             :  * in terms of this larger image, so coordinates need to be offset by
     936             :  * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
     937             :  * passing to pfnTransformer, and after return from it.
     938             :  *
     939             :  * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
     940             :  * data to this function when it is called.
     941             :  *
     942             :  * This field is required.
     943             :  */
     944             : 
     945             : /**
     946             :  * \var void *GDALWarpKernel::pTransformerArg;
     947             :  *
     948             :  * Callback data for pfnTransformer.
     949             :  *
     950             :  * This field may be NULL if not required for the pfnTransformer being used.
     951             :  */
     952             : 
     953             : /**
     954             :  * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
     955             :  *
     956             :  * The function to call to report progress of the algorithm, and to check
     957             :  * for a requested termination of the operation.  It operates according to
     958             :  * GDALProgressFunc() semantics.
     959             :  *
     960             :  * Generally speaking the progress function will be invoked for each
     961             :  * scanline of the destination buffer that has been processed.
     962             :  *
     963             :  * This field may be NULL (internally set to GDALDummyProgress()).
     964             :  */
     965             : 
     966             : /**
     967             :  * \var void *GDALWarpKernel::pProgress;
     968             :  *
     969             :  * Callback data for pfnProgress.
     970             :  *
     971             :  * This field may be NULL if not required for the pfnProgress being used.
     972             :  */
     973             : 
     974             : /************************************************************************/
     975             : /*                           GDALWarpKernel()                           */
     976             : /************************************************************************/
     977             : 
     978        2961 : GDALWarpKernel::GDALWarpKernel()
     979             :     : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
     980             :       eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
     981             :       dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
     982             :       papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
     983             :       pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
     984             :       papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
     985             :       dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
     986             :       nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
     987             :       nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
     988             :       pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
     989             :       pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
     990             :       padfDstNoDataReal(nullptr), psThreadData(nullptr),
     991        2961 :       eTieStrategy(GWKTS_First)
     992             : {
     993        2961 : }
     994             : 
     995             : /************************************************************************/
     996             : /*                          ~GDALWarpKernel()                           */
     997             : /************************************************************************/
     998             : 
     999        2961 : GDALWarpKernel::~GDALWarpKernel()
    1000             : {
    1001        2961 : }
    1002             : 
    1003             : /************************************************************************/
    1004             : /*                            PerformWarp()                             */
    1005             : /************************************************************************/
    1006             : 
    1007             : /**
    1008             :  * \fn CPLErr GDALWarpKernel::PerformWarp();
    1009             :  *
    1010             :  * This method performs the warp described in the GDALWarpKernel.
    1011             :  *
    1012             :  * @return CE_None on success or CE_Failure if an error occurs.
    1013             :  */
    1014             : 
    1015        2957 : CPLErr GDALWarpKernel::PerformWarp()
    1016             : 
    1017             : {
    1018        2957 :     const CPLErr eErr = Validate();
    1019             : 
    1020        2957 :     if (eErr != CE_None)
    1021           1 :         return eErr;
    1022             : 
    1023             :     // See #2445 and #3079.
    1024        2956 :     if (nSrcXSize <= 0 || nSrcYSize <= 0)
    1025             :     {
    1026          25 :         if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
    1027             :         {
    1028           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    1029           0 :             return CE_Failure;
    1030             :         }
    1031          25 :         return CE_None;
    1032             :     }
    1033             : 
    1034             :     /* -------------------------------------------------------------------- */
    1035             :     /*      Pre-calculate resampling scales and window sizes for filtering. */
    1036             :     /* -------------------------------------------------------------------- */
    1037             : 
    1038        2931 :     dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
    1039        2931 :     dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
    1040        2931 :     if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
    1041        1462 :         dfXScale = 1.0;
    1042        2931 :     if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
    1043        1188 :         dfYScale = 1.0;
    1044        2931 :     if (dfXScale < 1.0)
    1045             :     {
    1046         595 :         double dfXReciprocalScale = 1.0 / dfXScale;
    1047         595 :         const int nXReciprocalScale =
    1048         595 :             static_cast<int>(dfXReciprocalScale + 0.5);
    1049         595 :         if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
    1050         463 :             dfXScale = 1.0 / nXReciprocalScale;
    1051             :     }
    1052        2931 :     if (dfYScale < 1.0)
    1053             :     {
    1054         536 :         double dfYReciprocalScale = 1.0 / dfYScale;
    1055         536 :         const int nYReciprocalScale =
    1056         536 :             static_cast<int>(dfYReciprocalScale + 0.5);
    1057         536 :         if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
    1058         379 :             dfYScale = 1.0 / nYReciprocalScale;
    1059             :     }
    1060             : 
    1061             :     // XSCALE and YSCALE undocumented for now. Can help in some cases.
    1062             :     // Best would probably be a per-pixel scale computation.
    1063        2931 :     const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
    1064        2931 :     if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
    1065           1 :         dfXScale = CPLAtof(pszXScale);
    1066        2931 :     const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
    1067        2931 :     if (pszYScale != nullptr)
    1068           1 :         dfYScale = CPLAtof(pszYScale);
    1069             : 
    1070             :     // If the xscale is significantly lower than the yscale, this is highly
    1071             :     // suspicious of a situation of wrapping a very large virtual file in
    1072             :     // geographic coordinates with left and right parts being close to the
    1073             :     // antimeridian. In that situation, the xscale computed by the above method
    1074             :     // is completely wrong. Prefer doing an average of a few sample points
    1075             :     // instead
    1076        2931 :     if ((dfYScale / dfXScale > 100 ||
    1077           1 :          (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
    1078             :     {
    1079             :         // Sample points along a grid
    1080           4 :         const int nPointsX = std::min(10, nDstXSize);
    1081           4 :         const int nPointsY = std::min(10, nDstYSize);
    1082           4 :         const int nPoints = 3 * nPointsX * nPointsY;
    1083           8 :         std::vector<double> padfX;
    1084           8 :         std::vector<double> padfY;
    1085           8 :         std::vector<double> padfZ(nPoints);
    1086           8 :         std::vector<int> pabSuccess(nPoints);
    1087          44 :         for (int iY = 0; iY < nPointsY; iY++)
    1088             :         {
    1089         440 :             for (int iX = 0; iX < nPointsX; iX++)
    1090             :             {
    1091         400 :                 const double dfX =
    1092             :                     nPointsX == 1
    1093         400 :                         ? 0.0
    1094         400 :                         : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
    1095         400 :                 const double dfY =
    1096             :                     nPointsY == 1
    1097         400 :                         ? 0.0
    1098         400 :                         : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
    1099             : 
    1100             :                 // Reproject each destination sample point and its neighbours
    1101             :                 // at (x+1,y) and (x,y+1), so as to get the local scale.
    1102         400 :                 padfX.push_back(dfX);
    1103         400 :                 padfY.push_back(dfY);
    1104             : 
    1105         400 :                 padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
    1106         400 :                 padfY.push_back(dfY);
    1107             : 
    1108         400 :                 padfX.push_back(dfX);
    1109         400 :                 padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
    1110             :             }
    1111             :         }
    1112           4 :         pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
    1113           4 :                        &padfZ[0], &pabSuccess[0]);
    1114             : 
    1115             :         // Compute the xscale at each sampling point
    1116           8 :         std::vector<double> adfXScales;
    1117         404 :         for (int i = 0; i < nPoints; i += 3)
    1118             :         {
    1119         400 :             if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
    1120             :             {
    1121             :                 const double dfPointXScale =
    1122         400 :                     1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
    1123         800 :                                    std::abs(padfX[i + 2] - padfX[i]));
    1124         400 :                 adfXScales.push_back(dfPointXScale);
    1125             :             }
    1126             :         }
    1127             : 
    1128             :         // Sort by increasing xcale
    1129           4 :         std::sort(adfXScales.begin(), adfXScales.end());
    1130             : 
    1131           4 :         if (!adfXScales.empty())
    1132             :         {
    1133             :             // Compute the average of scales, but eliminate outliers small
    1134             :             // scales, if some samples are just along the discontinuity.
    1135           4 :             const double dfMaxPointXScale = adfXScales.back();
    1136           4 :             double dfSumPointXScale = 0;
    1137           4 :             int nCountPointScale = 0;
    1138         404 :             for (double dfPointXScale : adfXScales)
    1139             :             {
    1140         400 :                 if (dfPointXScale > dfMaxPointXScale / 10)
    1141             :                 {
    1142         398 :                     dfSumPointXScale += dfPointXScale;
    1143         398 :                     nCountPointScale++;
    1144             :                 }
    1145             :             }
    1146           4 :             if (nCountPointScale > 0)  // should always be true
    1147             :             {
    1148           4 :                 const double dfXScaleFromSampling =
    1149           4 :                     dfSumPointXScale / nCountPointScale;
    1150             : #if DEBUG_VERBOSE
    1151             :                 CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
    1152             :                          dfXScaleFromSampling);
    1153             : #endif
    1154           4 :                 dfXScale = dfXScaleFromSampling;
    1155             :             }
    1156             :         }
    1157             :     }
    1158             : 
    1159             : #if DEBUG_VERBOSE
    1160             :     CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
    1161             : #endif
    1162             : 
    1163        2931 :     const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
    1164             : 
    1165             :     // Safety check for callers that would use GDALWarpKernel without using
    1166             :     // GDALWarpOperation.
    1167        2868 :     if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
    1168        2805 :          ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
    1169        5862 :           !bUse4SamplesFormula)) &&
    1170         390 :         atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
    1171             :             WARP_EXTRA_ELTS)
    1172             :     {
    1173           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1174             :                  "Source arrays must have WARP_EXTRA_ELTS extra elements at "
    1175             :                  "their end. "
    1176             :                  "See GDALWarpKernel class definition. If this condition is "
    1177             :                  "fulfilled, define a EXTRA_ELTS=%d warp options",
    1178             :                  WARP_EXTRA_ELTS);
    1179           0 :         return CE_Failure;
    1180             :     }
    1181             : 
    1182        2931 :     dfXFilter = anGWKFilterRadius[eResample];
    1183        2931 :     dfYFilter = anGWKFilterRadius[eResample];
    1184             : 
    1185        2931 :     nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
    1186        2420 :                               : static_cast<int>(dfXFilter);
    1187        2931 :     nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
    1188        2423 :                               : static_cast<int>(dfYFilter);
    1189             : 
    1190             :     // Filter window offset depends on the parity of the kernel radius.
    1191        2931 :     nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
    1192        2931 :     nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
    1193             : 
    1194        2931 :     bApplyVerticalShift =
    1195        2931 :         CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
    1196        2931 :     dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
    1197        2931 :         papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
    1198             : 
    1199             :     /* -------------------------------------------------------------------- */
    1200             :     /*      Set up resampling functions.                                    */
    1201             :     /* -------------------------------------------------------------------- */
    1202        2931 :     if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
    1203          12 :         return GWKGeneralCase(this);
    1204             : 
    1205        2919 :     const bool bNoMasksOrDstDensityOnly =
    1206        2912 :         papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
    1207        5831 :         pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
    1208             : 
    1209        2919 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour &&
    1210             :         bNoMasksOrDstDensityOnly)
    1211         943 :         return GWKNearestNoMasksOrDstDensityOnlyByte(this);
    1212             : 
    1213        1976 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Bilinear &&
    1214             :         bNoMasksOrDstDensityOnly)
    1215         126 :         return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
    1216             : 
    1217        1850 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Cubic &&
    1218             :         bNoMasksOrDstDensityOnly)
    1219         676 :         return GWKCubicNoMasksOrDstDensityOnlyByte(this);
    1220             : 
    1221        1174 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_CubicSpline &&
    1222             :         bNoMasksOrDstDensityOnly)
    1223          12 :         return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
    1224             : 
    1225        1162 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour)
    1226         350 :         return GWKNearestByte(this);
    1227             : 
    1228         812 :     if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
    1229         165 :         eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
    1230          14 :         return GWKNearestNoMasksOrDstDensityOnlyShort(this);
    1231             : 
    1232         798 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
    1233             :         bNoMasksOrDstDensityOnly)
    1234           5 :         return GWKCubicNoMasksOrDstDensityOnlyShort(this);
    1235             : 
    1236         793 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
    1237             :         bNoMasksOrDstDensityOnly)
    1238           6 :         return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
    1239             : 
    1240         787 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
    1241             :         bNoMasksOrDstDensityOnly)
    1242           5 :         return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
    1243             : 
    1244         782 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
    1245             :         bNoMasksOrDstDensityOnly)
    1246          14 :         return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
    1247             : 
    1248         768 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
    1249             :         bNoMasksOrDstDensityOnly)
    1250           5 :         return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
    1251             : 
    1252         763 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
    1253             :         bNoMasksOrDstDensityOnly)
    1254           6 :         return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
    1255             : 
    1256         757 :     if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
    1257          45 :         return GWKNearestShort(this);
    1258             : 
    1259         712 :     if (eWorkingDataType == GDT_UInt16 && eResample == GRA_NearestNeighbour)
    1260          10 :         return GWKNearestUnsignedShort(this);
    1261             : 
    1262         702 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
    1263             :         bNoMasksOrDstDensityOnly)
    1264          11 :         return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
    1265             : 
    1266         691 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
    1267          50 :         return GWKNearestFloat(this);
    1268             : 
    1269         641 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
    1270             :         bNoMasksOrDstDensityOnly)
    1271           4 :         return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
    1272             : 
    1273         637 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
    1274             :         bNoMasksOrDstDensityOnly)
    1275           9 :         return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
    1276             : 
    1277             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    1278             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
    1279             :         bNoMasksOrDstDensityOnly)
    1280             :         return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
    1281             : 
    1282             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
    1283             :         bNoMasksOrDstDensityOnly)
    1284             :         return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
    1285             : #endif
    1286             : 
    1287         628 :     if (eResample == GRA_Average)
    1288          77 :         return GWKAverageOrMode(this);
    1289             : 
    1290         551 :     if (eResample == GRA_RMS)
    1291           9 :         return GWKAverageOrMode(this);
    1292             : 
    1293         542 :     if (eResample == GRA_Mode)
    1294          45 :         return GWKAverageOrMode(this);
    1295             : 
    1296         497 :     if (eResample == GRA_Max)
    1297           6 :         return GWKAverageOrMode(this);
    1298             : 
    1299         491 :     if (eResample == GRA_Min)
    1300           5 :         return GWKAverageOrMode(this);
    1301             : 
    1302         486 :     if (eResample == GRA_Med)
    1303           6 :         return GWKAverageOrMode(this);
    1304             : 
    1305         480 :     if (eResample == GRA_Q1)
    1306          10 :         return GWKAverageOrMode(this);
    1307             : 
    1308         470 :     if (eResample == GRA_Q3)
    1309           5 :         return GWKAverageOrMode(this);
    1310             : 
    1311         465 :     if (eResample == GRA_Sum)
    1312          19 :         return GWKSumPreserving(this);
    1313             : 
    1314         446 :     if (!GDALDataTypeIsComplex(eWorkingDataType))
    1315             :     {
    1316         219 :         return GWKRealCase(this);
    1317             :     }
    1318             : 
    1319         227 :     return GWKGeneralCase(this);
    1320             : }
    1321             : 
    1322             : /************************************************************************/
    1323             : /*                              Validate()                              */
    1324             : /************************************************************************/
    1325             : 
    1326             : /**
    1327             :  * \fn CPLErr GDALWarpKernel::Validate()
    1328             :  *
    1329             :  * Check the settings in the GDALWarpKernel, and issue a CPLError()
    1330             :  * (and return CE_Failure) if the configuration is considered to be
    1331             :  * invalid for some reason.
    1332             :  *
    1333             :  * This method will also do some standard defaulting such as setting
    1334             :  * pfnProgress to GDALDummyProgress() if it is NULL.
    1335             :  *
    1336             :  * @return CE_None on success or CE_Failure if an error is detected.
    1337             :  */
    1338             : 
    1339        2957 : CPLErr GDALWarpKernel::Validate()
    1340             : 
    1341             : {
    1342        2957 :     if (static_cast<size_t>(eResample) >=
    1343             :         (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
    1344             :     {
    1345           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1346             :                  "Unsupported resampling method %d.",
    1347           0 :                  static_cast<int>(eResample));
    1348           0 :         return CE_Failure;
    1349             :     }
    1350             : 
    1351             :     // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
    1352             :     // be ignored as contributing source pixels during resampling. Only taken into account by
    1353             :     // Average currently
    1354             :     const char *pszExcludedValues =
    1355        2957 :         CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
    1356        2957 :     if (pszExcludedValues)
    1357             :     {
    1358             :         const CPLStringList aosTokens(
    1359          14 :             CSLTokenizeString2(pszExcludedValues, "(,)", 0));
    1360          14 :         if ((aosTokens.size() % nBands) != 0)
    1361             :         {
    1362           1 :             CPLError(CE_Failure, CPLE_AppDefined,
    1363             :                      "EXCLUDED_VALUES should contain one or several tuples of "
    1364             :                      "%d values formatted like <R>,<G>,<B> or "
    1365             :                      "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
    1366             :                      "tuples",
    1367             :                      nBands);
    1368           1 :             return CE_Failure;
    1369             :         }
    1370          26 :         std::vector<double> adfTuple;
    1371          52 :         for (int i = 0; i < aosTokens.size(); ++i)
    1372             :         {
    1373          39 :             adfTuple.push_back(CPLAtof(aosTokens[i]));
    1374          39 :             if (((i + 1) % nBands) == 0)
    1375             :             {
    1376          13 :                 m_aadfExcludedValues.push_back(adfTuple);
    1377          13 :                 adfTuple.clear();
    1378             :             }
    1379             :         }
    1380             :     }
    1381             : 
    1382        2956 :     return CE_None;
    1383             : }
    1384             : 
    1385             : /************************************************************************/
    1386             : /*                         GWKOverlayDensity()                          */
    1387             : /*                                                                      */
    1388             : /*      Compute the final density for the destination pixel.  This      */
    1389             : /*      is a function of the overlay density (passed in) and the        */
    1390             : /*      original density.                                               */
    1391             : /************************************************************************/
    1392             : 
    1393    10022900 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
    1394             :                               double dfDensity)
    1395             : {
    1396    10022900 :     if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
    1397     8062630 :         return;
    1398             : 
    1399     1960260 :     poWK->pafDstDensity[iDstOffset] =
    1400     1960260 :         1.0f -
    1401     1960260 :         (1.0f - float(dfDensity)) * (1.0f - poWK->pafDstDensity[iDstOffset]);
    1402             : }
    1403             : 
    1404             : /************************************************************************/
    1405             : /*                          GWKRoundValueT()                            */
    1406             : /************************************************************************/
    1407             : 
    1408             : template <class T, class U, bool is_signed> struct sGWKRoundValueT
    1409             : {
    1410             :     static T eval(U);
    1411             : };
    1412             : 
    1413             : template <class T, class U> struct sGWKRoundValueT<T, U, true> /* signed */
    1414             : {
    1415      791525 :     static T eval(U value)
    1416             :     {
    1417      791525 :         return static_cast<T>(floor(value + U(0.5)));
    1418             :     }
    1419             : };
    1420             : 
    1421             : template <class T, class U> struct sGWKRoundValueT<T, U, false> /* unsigned */
    1422             : {
    1423   124557197 :     static T eval(U value)
    1424             :     {
    1425   124557197 :         return static_cast<T>(value + U(0.5));
    1426             :     }
    1427             : };
    1428             : 
    1429   125348722 : template <class T, class U> static T GWKRoundValueT(U value)
    1430             : {
    1431   125348722 :     return sGWKRoundValueT<T, U, cpl::NumericLimits<T>::is_signed>::eval(value);
    1432             : }
    1433             : 
    1434      268974 : template <> float GWKRoundValueT<float, double>(double value)
    1435             : {
    1436      268974 :     return static_cast<float>(value);
    1437             : }
    1438             : 
    1439             : #ifdef notused
    1440             : template <> double GWKRoundValueT<double, double>(double value)
    1441             : {
    1442             :     return value;
    1443             : }
    1444             : #endif
    1445             : 
    1446             : /************************************************************************/
    1447             : /*                            GWKClampValueT()                          */
    1448             : /************************************************************************/
    1449             : 
    1450   120170134 : template <class T, class U> static CPL_INLINE T GWKClampValueT(U value)
    1451             : {
    1452   120170134 :     if (value < static_cast<U>(cpl::NumericLimits<T>::min()))
    1453      545370 :         return cpl::NumericLimits<T>::min();
    1454   119624726 :     else if (value > static_cast<U>(cpl::NumericLimits<T>::max()))
    1455      772965 :         return cpl::NumericLimits<T>::max();
    1456             :     else
    1457   118851926 :         return GWKRoundValueT<T, U>(value);
    1458             : }
    1459             : 
    1460      718914 : template <> float GWKClampValueT<float, double>(double dfValue)
    1461             : {
    1462      718914 :     return static_cast<float>(dfValue);
    1463             : }
    1464             : 
    1465             : #ifdef notused
    1466             : template <> double GWKClampValueT<double, double>(double dfValue)
    1467             : {
    1468             :     return dfValue;
    1469             : }
    1470             : #endif
    1471             : 
    1472             : /************************************************************************/
    1473             : /*                             AvoidNoData()                            */
    1474             : /************************************************************************/
    1475             : 
    1476        1283 : template <class T> inline void AvoidNoData(T *pDst, GPtrDiff_t iDstOffset)
    1477             : {
    1478             :     if constexpr (cpl::NumericLimits<T>::is_integer)
    1479             :     {
    1480        1027 :         if (pDst[iDstOffset] == static_cast<T>(cpl::NumericLimits<T>::lowest()))
    1481             :         {
    1482         515 :             pDst[iDstOffset] =
    1483         515 :                 static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
    1484             :         }
    1485             :         else
    1486         512 :             pDst[iDstOffset]--;
    1487             :     }
    1488             :     else
    1489             :     {
    1490         256 :         if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
    1491             :         {
    1492             :             using std::nextafter;
    1493           0 :             pDst[iDstOffset] = nextafter(pDst[iDstOffset], static_cast<T>(0));
    1494             :         }
    1495             :         else
    1496             :         {
    1497             :             using std::nextafter;
    1498         256 :             pDst[iDstOffset] =
    1499         256 :                 nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
    1500             :         }
    1501             :     }
    1502        1283 : }
    1503             : 
    1504             : /************************************************************************/
    1505             : /*                             AvoidNoData()                            */
    1506             : /************************************************************************/
    1507             : 
    1508             : template <class T>
    1509    13527030 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1510             :                         GPtrDiff_t iDstOffset)
    1511             : {
    1512    13527030 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1513    13527030 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1514             : 
    1515    13527030 :     if (poWK->padfDstNoDataReal != nullptr &&
    1516     6419188 :         poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
    1517             :     {
    1518         640 :         AvoidNoData(pDst, iDstOffset);
    1519             : 
    1520         640 :         if (!poWK->bWarnedAboutDstNoDataReplacement)
    1521             :         {
    1522          40 :             const_cast<GDALWarpKernel *>(poWK)
    1523             :                 ->bWarnedAboutDstNoDataReplacement = true;
    1524          40 :             CPLError(CE_Warning, CPLE_AppDefined,
    1525             :                      "Value %g in the source dataset has been changed to %g "
    1526             :                      "in the destination dataset to avoid being treated as "
    1527             :                      "NoData. To avoid this, select a different NoData value "
    1528             :                      "for the destination dataset.",
    1529          40 :                      poWK->padfDstNoDataReal[iBand],
    1530          40 :                      static_cast<double>(pDst[iDstOffset]));
    1531             :         }
    1532             :     }
    1533    13527030 : }
    1534             : 
    1535             : /************************************************************************/
    1536             : /*                       GWKAvoidNoDataMultiBand()                      */
    1537             : /************************************************************************/
    1538             : 
    1539             : template <class T>
    1540      524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
    1541             :                                     GPtrDiff_t iDstOffset)
    1542             : {
    1543      524573 :     T **ppDst = reinterpret_cast<T **>(poWK->papabyDstImage);
    1544      524573 :     if (poWK->padfDstNoDataReal != nullptr)
    1545             :     {
    1546      208615 :         for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1547             :         {
    1548      208294 :             if (poWK->padfDstNoDataReal[iBand] !=
    1549      208294 :                 static_cast<double>(ppDst[iBand][iDstOffset]))
    1550      205830 :                 return;
    1551             :         }
    1552         964 :         for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1553             :         {
    1554         643 :             AvoidNoData(ppDst[iBand], iDstOffset);
    1555             :         }
    1556             : 
    1557         321 :         if (!poWK->bWarnedAboutDstNoDataReplacement)
    1558             :         {
    1559          21 :             const_cast<GDALWarpKernel *>(poWK)
    1560             :                 ->bWarnedAboutDstNoDataReplacement = true;
    1561          42 :             std::string valueSrc, valueDst;
    1562          64 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1563             :             {
    1564          43 :                 if (!valueSrc.empty())
    1565             :                 {
    1566          22 :                     valueSrc += ',';
    1567          22 :                     valueDst += ',';
    1568             :                 }
    1569          43 :                 valueSrc += CPLSPrintf("%g", poWK->padfDstNoDataReal[iBand]);
    1570          43 :                 valueDst += CPLSPrintf(
    1571          43 :                     "%g", static_cast<double>(ppDst[iBand][iDstOffset]));
    1572             :             }
    1573          21 :             CPLError(CE_Warning, CPLE_AppDefined,
    1574             :                      "Value %s in the source dataset has been changed to %s "
    1575             :                      "in the destination dataset to avoid being treated as "
    1576             :                      "NoData. To avoid this, select a different NoData value "
    1577             :                      "for the destination dataset.",
    1578             :                      valueSrc.c_str(), valueDst.c_str());
    1579             :         }
    1580             :     }
    1581             : }
    1582             : 
    1583             : /************************************************************************/
    1584             : /*                       GWKAvoidNoDataMultiBand()                      */
    1585             : /************************************************************************/
    1586             : 
    1587      524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
    1588             :                                     GPtrDiff_t iDstOffset)
    1589             : {
    1590      524573 :     switch (poWK->eWorkingDataType)
    1591             :     {
    1592      523997 :         case GDT_UInt8:
    1593      523997 :             GWKAvoidNoDataMultiBand<std::uint8_t>(poWK, iDstOffset);
    1594      523997 :             break;
    1595             : 
    1596           0 :         case GDT_Int8:
    1597           0 :             GWKAvoidNoDataMultiBand<std::int8_t>(poWK, iDstOffset);
    1598           0 :             break;
    1599             : 
    1600         128 :         case GDT_Int16:
    1601         128 :             GWKAvoidNoDataMultiBand<std::int16_t>(poWK, iDstOffset);
    1602         128 :             break;
    1603             : 
    1604          64 :         case GDT_UInt16:
    1605          64 :             GWKAvoidNoDataMultiBand<std::uint16_t>(poWK, iDstOffset);
    1606          64 :             break;
    1607             : 
    1608          64 :         case GDT_Int32:
    1609          64 :             GWKAvoidNoDataMultiBand<std::int32_t>(poWK, iDstOffset);
    1610          64 :             break;
    1611             : 
    1612          64 :         case GDT_UInt32:
    1613          64 :             GWKAvoidNoDataMultiBand<std::uint32_t>(poWK, iDstOffset);
    1614          64 :             break;
    1615             : 
    1616          64 :         case GDT_Int64:
    1617          64 :             GWKAvoidNoDataMultiBand<std::int64_t>(poWK, iDstOffset);
    1618          64 :             break;
    1619             : 
    1620          64 :         case GDT_UInt64:
    1621          64 :             GWKAvoidNoDataMultiBand<std::uint64_t>(poWK, iDstOffset);
    1622          64 :             break;
    1623             : 
    1624           0 :         case GDT_Float16:
    1625           0 :             GWKAvoidNoDataMultiBand<GFloat16>(poWK, iDstOffset);
    1626           0 :             break;
    1627             : 
    1628          64 :         case GDT_Float32:
    1629          64 :             GWKAvoidNoDataMultiBand<float>(poWK, iDstOffset);
    1630          64 :             break;
    1631             : 
    1632          64 :         case GDT_Float64:
    1633          64 :             GWKAvoidNoDataMultiBand<double>(poWK, iDstOffset);
    1634          64 :             break;
    1635             : 
    1636           0 :         case GDT_CInt16:
    1637             :         case GDT_CInt32:
    1638             :         case GDT_CFloat16:
    1639             :         case GDT_CFloat32:
    1640             :         case GDT_CFloat64:
    1641             :         case GDT_Unknown:
    1642             :         case GDT_TypeCount:
    1643           0 :             break;
    1644             :     }
    1645      524573 : }
    1646             : 
    1647             : /************************************************************************/
    1648             : /*                         GWKSetPixelValueRealT()                      */
    1649             : /************************************************************************/
    1650             : 
    1651             : template <class T>
    1652     9992427 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
    1653             :                                   GPtrDiff_t iDstOffset, double dfDensity,
    1654             :                                   T value, bool bAvoidNoDataSingleBand)
    1655             : {
    1656     9992427 :     T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    1657             : 
    1658             :     /* -------------------------------------------------------------------- */
    1659             :     /*      If the source density is less than 100% we need to fetch the    */
    1660             :     /*      existing destination value, and mix it with the source to       */
    1661             :     /*      get the new "to apply" value.  Also compute composite           */
    1662             :     /*      density.                                                        */
    1663             :     /*                                                                      */
    1664             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1665             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1666             :     /* -------------------------------------------------------------------- */
    1667     9992427 :     if (dfDensity < 0.9999)
    1668             :     {
    1669      945508 :         if (dfDensity < 0.0001)
    1670           0 :             return true;
    1671             : 
    1672      945508 :         double dfDstDensity = 1.0;
    1673             : 
    1674      945508 :         if (poWK->pafDstDensity != nullptr)
    1675      944036 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    1676        1472 :         else if (poWK->panDstValid != nullptr &&
    1677           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1678           0 :             dfDstDensity = 0.0;
    1679             : 
    1680             :         // It seems like we also ought to be testing panDstValid[] here!
    1681             : 
    1682      945508 :         const double dfDstReal = static_cast<double>(pDst[iDstOffset]);
    1683             : 
    1684             :         // The destination density is really only relative to the portion
    1685             :         // not occluded by the overlay.
    1686      945508 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1687             : 
    1688      945508 :         const double dfReal =
    1689      945508 :             (double(value) * dfDensity + dfDstReal * dfDstInfluence) /
    1690      945508 :             (dfDensity + dfDstInfluence);
    1691             : 
    1692             :         /* --------------------------------------------------------------------
    1693             :          */
    1694             :         /*      Actually apply the destination value. */
    1695             :         /*                                                                      */
    1696             :         /*      Avoid using the destination nodata value for integer datatypes
    1697             :          */
    1698             :         /*      if by chance it is equal to the computed pixel value. */
    1699             :         /* --------------------------------------------------------------------
    1700             :          */
    1701      945508 :         pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
    1702             :     }
    1703             :     else
    1704             :     {
    1705     9046916 :         pDst[iDstOffset] = value;
    1706             :     }
    1707             : 
    1708     9992427 :     if (bAvoidNoDataSingleBand)
    1709     8719761 :         AvoidNoData<T>(poWK, iBand, iDstOffset);
    1710             : 
    1711     9992427 :     return true;
    1712             : }
    1713             : 
    1714             : /************************************************************************/
    1715             : /*                       ClampRoundAndAvoidNoData()                     */
    1716             : /************************************************************************/
    1717             : 
    1718             : template <class T>
    1719     5107725 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1720             :                                      GPtrDiff_t iDstOffset, double dfReal,
    1721             :                                      bool bAvoidNoDataSingleBand)
    1722             : {
    1723     5107725 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1724     5107725 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1725             : 
    1726             :     if constexpr (cpl::NumericLimits<T>::is_integer)
    1727             :     {
    1728             :         using std::floor;
    1729     4610595 :         if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
    1730        5308 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
    1731     4605285 :         else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1732       23628 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
    1733             :         else if constexpr (cpl::NumericLimits<T>::is_signed)
    1734       10410 :             pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
    1735             :         else
    1736     4571245 :             pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
    1737             :     }
    1738             :     else
    1739             :     {
    1740      497130 :         pDst[iDstOffset] = static_cast<T>(dfReal);
    1741             :     }
    1742             : 
    1743     5107725 :     if (bAvoidNoDataSingleBand)
    1744     4807319 :         AvoidNoData<T>(poWK, iBand, iDstOffset);
    1745     5107725 : }
    1746             : 
    1747             : /************************************************************************/
    1748             : /*                          GWKSetPixelValue()                          */
    1749             : /************************************************************************/
    1750             : 
    1751     4012410 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
    1752             :                              GPtrDiff_t iDstOffset, double dfDensity,
    1753             :                              double dfReal, double dfImag,
    1754             :                              bool bAvoidNoDataSingleBand)
    1755             : 
    1756             : {
    1757     4012410 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1758             : 
    1759             :     /* -------------------------------------------------------------------- */
    1760             :     /*      If the source density is less than 100% we need to fetch the    */
    1761             :     /*      existing destination value, and mix it with the source to       */
    1762             :     /*      get the new "to apply" value.  Also compute composite           */
    1763             :     /*      density.                                                        */
    1764             :     /*                                                                      */
    1765             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1766             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1767             :     /* -------------------------------------------------------------------- */
    1768     4012410 :     if (dfDensity < 0.9999)
    1769             :     {
    1770         800 :         if (dfDensity < 0.0001)
    1771           0 :             return true;
    1772             : 
    1773         800 :         double dfDstDensity = 1.0;
    1774         800 :         if (poWK->pafDstDensity != nullptr)
    1775         800 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    1776           0 :         else if (poWK->panDstValid != nullptr &&
    1777           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1778           0 :             dfDstDensity = 0.0;
    1779             : 
    1780         800 :         double dfDstReal = 0.0;
    1781         800 :         double dfDstImag = 0.0;
    1782             :         // It seems like we also ought to be testing panDstValid[] here!
    1783             : 
    1784             :         // TODO(schwehr): Factor out this repreated type of set.
    1785         800 :         switch (poWK->eWorkingDataType)
    1786             :         {
    1787           0 :             case GDT_UInt8:
    1788           0 :                 dfDstReal = pabyDst[iDstOffset];
    1789           0 :                 dfDstImag = 0.0;
    1790           0 :                 break;
    1791             : 
    1792           0 :             case GDT_Int8:
    1793           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1794           0 :                 dfDstImag = 0.0;
    1795           0 :                 break;
    1796             : 
    1797         400 :             case GDT_Int16:
    1798         400 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1799         400 :                 dfDstImag = 0.0;
    1800         400 :                 break;
    1801             : 
    1802         400 :             case GDT_UInt16:
    1803         400 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1804         400 :                 dfDstImag = 0.0;
    1805         400 :                 break;
    1806             : 
    1807           0 :             case GDT_Int32:
    1808           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1809           0 :                 dfDstImag = 0.0;
    1810           0 :                 break;
    1811             : 
    1812           0 :             case GDT_UInt32:
    1813           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1814           0 :                 dfDstImag = 0.0;
    1815           0 :                 break;
    1816             : 
    1817           0 :             case GDT_Int64:
    1818           0 :                 dfDstReal = static_cast<double>(
    1819           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1820           0 :                 dfDstImag = 0.0;
    1821           0 :                 break;
    1822             : 
    1823           0 :             case GDT_UInt64:
    1824           0 :                 dfDstReal = static_cast<double>(
    1825           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1826           0 :                 dfDstImag = 0.0;
    1827           0 :                 break;
    1828             : 
    1829           0 :             case GDT_Float16:
    1830           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    1831           0 :                 dfDstImag = 0.0;
    1832           0 :                 break;
    1833             : 
    1834           0 :             case GDT_Float32:
    1835           0 :                 dfDstReal =
    1836           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
    1837           0 :                 dfDstImag = 0.0;
    1838           0 :                 break;
    1839             : 
    1840           0 :             case GDT_Float64:
    1841           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1842           0 :                 dfDstImag = 0.0;
    1843           0 :                 break;
    1844             : 
    1845           0 :             case GDT_CInt16:
    1846           0 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
    1847           0 :                 dfDstImag =
    1848           0 :                     reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
    1849           0 :                 break;
    1850             : 
    1851           0 :             case GDT_CInt32:
    1852           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
    1853           0 :                 dfDstImag =
    1854           0 :                     reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
    1855           0 :                 break;
    1856             : 
    1857           0 :             case GDT_CFloat16:
    1858             :                 dfDstReal =
    1859           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
    1860             :                 dfDstImag =
    1861           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
    1862           0 :                 break;
    1863             : 
    1864           0 :             case GDT_CFloat32:
    1865           0 :                 dfDstReal =
    1866           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset * 2]);
    1867           0 :                 dfDstImag = double(
    1868           0 :                     reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1]);
    1869           0 :                 break;
    1870             : 
    1871           0 :             case GDT_CFloat64:
    1872           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
    1873           0 :                 dfDstImag =
    1874           0 :                     reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
    1875           0 :                 break;
    1876             : 
    1877           0 :             case GDT_Unknown:
    1878             :             case GDT_TypeCount:
    1879           0 :                 CPLAssert(false);
    1880             :                 return false;
    1881             :         }
    1882             : 
    1883             :         // The destination density is really only relative to the portion
    1884             :         // not occluded by the overlay.
    1885         800 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1886             : 
    1887         800 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    1888         800 :                  (dfDensity + dfDstInfluence);
    1889             : 
    1890         800 :         dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
    1891         800 :                  (dfDensity + dfDstInfluence);
    1892             :     }
    1893             : 
    1894             :     /* -------------------------------------------------------------------- */
    1895             :     /*      Actually apply the destination value.                           */
    1896             :     /*                                                                      */
    1897             :     /*      Avoid using the destination nodata value for integer datatypes  */
    1898             :     /*      if by chance it is equal to the computed pixel value.           */
    1899             :     /* -------------------------------------------------------------------- */
    1900             : 
    1901     4012410 :     switch (poWK->eWorkingDataType)
    1902             :     {
    1903     3290010 :         case GDT_UInt8:
    1904     3290010 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
    1905             :                                             bAvoidNoDataSingleBand);
    1906     3290010 :             break;
    1907             : 
    1908           0 :         case GDT_Int8:
    1909           0 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
    1910             :                                             bAvoidNoDataSingleBand);
    1911           0 :             break;
    1912             : 
    1913        7472 :         case GDT_Int16:
    1914        7472 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
    1915             :                                              bAvoidNoDataSingleBand);
    1916        7472 :             break;
    1917             : 
    1918         464 :         case GDT_UInt16:
    1919         464 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
    1920             :                                               bAvoidNoDataSingleBand);
    1921         464 :             break;
    1922             : 
    1923          63 :         case GDT_UInt32:
    1924          63 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
    1925             :                                               bAvoidNoDataSingleBand);
    1926          63 :             break;
    1927             : 
    1928          63 :         case GDT_Int32:
    1929          63 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
    1930             :                                              bAvoidNoDataSingleBand);
    1931          63 :             break;
    1932             : 
    1933           0 :         case GDT_UInt64:
    1934           0 :             ClampRoundAndAvoidNoData<std::uint64_t>(
    1935             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    1936           0 :             break;
    1937             : 
    1938           0 :         case GDT_Int64:
    1939           0 :             ClampRoundAndAvoidNoData<std::int64_t>(
    1940             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    1941           0 :             break;
    1942             : 
    1943           0 :         case GDT_Float16:
    1944           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
    1945             :                                                bAvoidNoDataSingleBand);
    1946           0 :             break;
    1947             : 
    1948      478957 :         case GDT_Float32:
    1949      478957 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
    1950             :                                             bAvoidNoDataSingleBand);
    1951      478957 :             break;
    1952             : 
    1953         149 :         case GDT_Float64:
    1954         149 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
    1955             :                                              bAvoidNoDataSingleBand);
    1956         149 :             break;
    1957             : 
    1958      234079 :         case GDT_CInt16:
    1959             :         {
    1960             :             typedef GInt16 T;
    1961      234079 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    1962           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1963           0 :                     cpl::NumericLimits<T>::min();
    1964      234079 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1965           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1966           0 :                     cpl::NumericLimits<T>::max();
    1967             :             else
    1968      234079 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1969      234079 :                     static_cast<T>(floor(dfReal + 0.5));
    1970      234079 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    1971           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1972           0 :                     cpl::NumericLimits<T>::min();
    1973      234079 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    1974           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1975           0 :                     cpl::NumericLimits<T>::max();
    1976             :             else
    1977      234079 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1978      234079 :                     static_cast<T>(floor(dfImag + 0.5));
    1979      234079 :             break;
    1980             :         }
    1981             : 
    1982         379 :         case GDT_CInt32:
    1983             :         {
    1984             :             typedef GInt32 T;
    1985         379 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    1986           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1987           0 :                     cpl::NumericLimits<T>::min();
    1988         379 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1989           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1990           0 :                     cpl::NumericLimits<T>::max();
    1991             :             else
    1992         379 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1993         379 :                     static_cast<T>(floor(dfReal + 0.5));
    1994         379 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    1995           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1996           0 :                     cpl::NumericLimits<T>::min();
    1997         379 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    1998           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1999           0 :                     cpl::NumericLimits<T>::max();
    2000             :             else
    2001         379 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2002         379 :                     static_cast<T>(floor(dfImag + 0.5));
    2003         379 :             break;
    2004             :         }
    2005             : 
    2006           0 :         case GDT_CFloat16:
    2007           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
    2008           0 :                 static_cast<GFloat16>(dfReal);
    2009           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
    2010           0 :                 static_cast<GFloat16>(dfImag);
    2011           0 :             break;
    2012             : 
    2013         394 :         case GDT_CFloat32:
    2014         394 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
    2015         394 :                 static_cast<float>(dfReal);
    2016         394 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
    2017         394 :                 static_cast<float>(dfImag);
    2018         394 :             break;
    2019             : 
    2020         380 :         case GDT_CFloat64:
    2021         380 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
    2022         380 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
    2023         380 :             break;
    2024             : 
    2025           0 :         case GDT_Unknown:
    2026             :         case GDT_TypeCount:
    2027           0 :             return false;
    2028             :     }
    2029             : 
    2030     4012410 :     return true;
    2031             : }
    2032             : 
    2033             : /************************************************************************/
    2034             : /*                       GWKSetPixelValueReal()                         */
    2035             : /************************************************************************/
    2036             : 
    2037     1330540 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2038             :                                  GPtrDiff_t iDstOffset, double dfDensity,
    2039             :                                  double dfReal, bool bAvoidNoDataSingleBand)
    2040             : 
    2041             : {
    2042     1330540 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    2043             : 
    2044             :     /* -------------------------------------------------------------------- */
    2045             :     /*      If the source density is less than 100% we need to fetch the    */
    2046             :     /*      existing destination value, and mix it with the source to       */
    2047             :     /*      get the new "to apply" value.  Also compute composite           */
    2048             :     /*      density.                                                        */
    2049             :     /*                                                                      */
    2050             :     /*      We avoid mixing if density is very near one or risk mixing      */
    2051             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    2052             :     /* -------------------------------------------------------------------- */
    2053     1330540 :     if (dfDensity < 0.9999)
    2054             :     {
    2055         600 :         if (dfDensity < 0.0001)
    2056           0 :             return true;
    2057             : 
    2058         600 :         double dfDstReal = 0.0;
    2059         600 :         double dfDstDensity = 1.0;
    2060             : 
    2061         600 :         if (poWK->pafDstDensity != nullptr)
    2062         600 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    2063           0 :         else if (poWK->panDstValid != nullptr &&
    2064           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    2065           0 :             dfDstDensity = 0.0;
    2066             : 
    2067             :         // It seems like we also ought to be testing panDstValid[] here!
    2068             : 
    2069         600 :         switch (poWK->eWorkingDataType)
    2070             :         {
    2071           0 :             case GDT_UInt8:
    2072           0 :                 dfDstReal = pabyDst[iDstOffset];
    2073           0 :                 break;
    2074             : 
    2075           0 :             case GDT_Int8:
    2076           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    2077           0 :                 break;
    2078             : 
    2079         300 :             case GDT_Int16:
    2080         300 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    2081         300 :                 break;
    2082             : 
    2083         300 :             case GDT_UInt16:
    2084         300 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    2085         300 :                 break;
    2086             : 
    2087           0 :             case GDT_Int32:
    2088           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    2089           0 :                 break;
    2090             : 
    2091           0 :             case GDT_UInt32:
    2092           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    2093           0 :                 break;
    2094             : 
    2095           0 :             case GDT_Int64:
    2096           0 :                 dfDstReal = static_cast<double>(
    2097           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    2098           0 :                 break;
    2099             : 
    2100           0 :             case GDT_UInt64:
    2101           0 :                 dfDstReal = static_cast<double>(
    2102           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    2103           0 :                 break;
    2104             : 
    2105           0 :             case GDT_Float16:
    2106           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    2107           0 :                 break;
    2108             : 
    2109           0 :             case GDT_Float32:
    2110           0 :                 dfDstReal =
    2111           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
    2112           0 :                 break;
    2113             : 
    2114           0 :             case GDT_Float64:
    2115           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    2116           0 :                 break;
    2117             : 
    2118           0 :             case GDT_CInt16:
    2119             :             case GDT_CInt32:
    2120             :             case GDT_CFloat16:
    2121             :             case GDT_CFloat32:
    2122             :             case GDT_CFloat64:
    2123             :             case GDT_Unknown:
    2124             :             case GDT_TypeCount:
    2125           0 :                 CPLAssert(false);
    2126             :                 return false;
    2127             :         }
    2128             : 
    2129             :         // The destination density is really only relative to the portion
    2130             :         // not occluded by the overlay.
    2131         600 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    2132             : 
    2133         600 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    2134         600 :                  (dfDensity + dfDstInfluence);
    2135             :     }
    2136             : 
    2137             :     /* -------------------------------------------------------------------- */
    2138             :     /*      Actually apply the destination value.                           */
    2139             :     /*                                                                      */
    2140             :     /*      Avoid using the destination nodata value for integer datatypes  */
    2141             :     /*      if by chance it is equal to the computed pixel value.           */
    2142             :     /* -------------------------------------------------------------------- */
    2143             : 
    2144     1330540 :     switch (poWK->eWorkingDataType)
    2145             :     {
    2146     1308410 :         case GDT_UInt8:
    2147     1308410 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
    2148             :                                             bAvoidNoDataSingleBand);
    2149     1308410 :             break;
    2150             : 
    2151           0 :         case GDT_Int8:
    2152           0 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
    2153             :                                             bAvoidNoDataSingleBand);
    2154           0 :             break;
    2155             : 
    2156        1309 :         case GDT_Int16:
    2157        1309 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
    2158             :                                              bAvoidNoDataSingleBand);
    2159        1309 :             break;
    2160             : 
    2161         475 :         case GDT_UInt16:
    2162         475 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
    2163             :                                               bAvoidNoDataSingleBand);
    2164         475 :             break;
    2165             : 
    2166         539 :         case GDT_UInt32:
    2167         539 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
    2168             :                                               bAvoidNoDataSingleBand);
    2169         539 :             break;
    2170             : 
    2171        1342 :         case GDT_Int32:
    2172        1342 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
    2173             :                                              bAvoidNoDataSingleBand);
    2174        1342 :             break;
    2175             : 
    2176         224 :         case GDT_UInt64:
    2177         224 :             ClampRoundAndAvoidNoData<std::uint64_t>(
    2178             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2179         224 :             break;
    2180             : 
    2181         224 :         case GDT_Int64:
    2182         224 :             ClampRoundAndAvoidNoData<std::int64_t>(
    2183             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2184         224 :             break;
    2185             : 
    2186           0 :         case GDT_Float16:
    2187           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
    2188             :                                                bAvoidNoDataSingleBand);
    2189           0 :             break;
    2190             : 
    2191        3538 :         case GDT_Float32:
    2192        3538 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
    2193             :                                             bAvoidNoDataSingleBand);
    2194        3538 :             break;
    2195             : 
    2196       14486 :         case GDT_Float64:
    2197       14486 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
    2198             :                                              bAvoidNoDataSingleBand);
    2199       14486 :             break;
    2200             : 
    2201           0 :         case GDT_CInt16:
    2202             :         case GDT_CInt32:
    2203             :         case GDT_CFloat16:
    2204             :         case GDT_CFloat32:
    2205             :         case GDT_CFloat64:
    2206           0 :             return false;
    2207             : 
    2208           0 :         case GDT_Unknown:
    2209             :         case GDT_TypeCount:
    2210           0 :             CPLAssert(false);
    2211             :             return false;
    2212             :     }
    2213             : 
    2214     1330540 :     return true;
    2215             : }
    2216             : 
    2217             : /************************************************************************/
    2218             : /*                          GWKGetPixelValue()                          */
    2219             : /************************************************************************/
    2220             : 
    2221             : /* It is assumed that panUnifiedSrcValid has been checked before */
    2222             : 
    2223    30506400 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
    2224             :                              GPtrDiff_t iSrcOffset, double *pdfDensity,
    2225             :                              double *pdfReal, double *pdfImag)
    2226             : 
    2227             : {
    2228    30506400 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2229             : 
    2230    61012700 :     if (poWK->papanBandSrcValid != nullptr &&
    2231    30506400 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2232           0 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2233             :     {
    2234           0 :         *pdfDensity = 0.0;
    2235           0 :         return false;
    2236             :     }
    2237             : 
    2238    30506400 :     *pdfReal = 0.0;
    2239    30506400 :     *pdfImag = 0.0;
    2240             : 
    2241             :     // TODO(schwehr): Fix casting.
    2242    30506400 :     switch (poWK->eWorkingDataType)
    2243             :     {
    2244    29429400 :         case GDT_UInt8:
    2245    29429400 :             *pdfReal = pabySrc[iSrcOffset];
    2246    29429400 :             *pdfImag = 0.0;
    2247    29429400 :             break;
    2248             : 
    2249           0 :         case GDT_Int8:
    2250           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2251           0 :             *pdfImag = 0.0;
    2252           0 :             break;
    2253             : 
    2254       28232 :         case GDT_Int16:
    2255       28232 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2256       28232 :             *pdfImag = 0.0;
    2257       28232 :             break;
    2258             : 
    2259         166 :         case GDT_UInt16:
    2260         166 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2261         166 :             *pdfImag = 0.0;
    2262         166 :             break;
    2263             : 
    2264          63 :         case GDT_Int32:
    2265          63 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2266          63 :             *pdfImag = 0.0;
    2267          63 :             break;
    2268             : 
    2269          63 :         case GDT_UInt32:
    2270          63 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2271          63 :             *pdfImag = 0.0;
    2272          63 :             break;
    2273             : 
    2274           0 :         case GDT_Int64:
    2275           0 :             *pdfReal = static_cast<double>(
    2276           0 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2277           0 :             *pdfImag = 0.0;
    2278           0 :             break;
    2279             : 
    2280           0 :         case GDT_UInt64:
    2281           0 :             *pdfReal = static_cast<double>(
    2282           0 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2283           0 :             *pdfImag = 0.0;
    2284           0 :             break;
    2285             : 
    2286           0 :         case GDT_Float16:
    2287           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2288           0 :             *pdfImag = 0.0;
    2289           0 :             break;
    2290             : 
    2291     1047220 :         case GDT_Float32:
    2292     1047220 :             *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
    2293     1047220 :             *pdfImag = 0.0;
    2294     1047220 :             break;
    2295             : 
    2296         587 :         case GDT_Float64:
    2297         587 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2298         587 :             *pdfImag = 0.0;
    2299         587 :             break;
    2300             : 
    2301         133 :         case GDT_CInt16:
    2302         133 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
    2303         133 :             *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2304         133 :             break;
    2305             : 
    2306         133 :         case GDT_CInt32:
    2307         133 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
    2308         133 :             *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
    2309         133 :             break;
    2310             : 
    2311           0 :         case GDT_CFloat16:
    2312           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
    2313           0 :             *pdfImag =
    2314           0 :                 reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2315           0 :             break;
    2316             : 
    2317         194 :         case GDT_CFloat32:
    2318         194 :             *pdfReal =
    2319         194 :                 double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2]);
    2320         194 :             *pdfImag =
    2321         194 :                 double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1]);
    2322         194 :             break;
    2323             : 
    2324         138 :         case GDT_CFloat64:
    2325         138 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
    2326         138 :             *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
    2327         138 :             break;
    2328             : 
    2329           0 :         case GDT_Unknown:
    2330             :         case GDT_TypeCount:
    2331           0 :             CPLAssert(false);
    2332             :             *pdfDensity = 0.0;
    2333             :             return false;
    2334             :     }
    2335             : 
    2336    30506400 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2337     4194800 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2338             :     else
    2339    26311600 :         *pdfDensity = 1.0;
    2340             : 
    2341    30506400 :     return *pdfDensity != 0.0;
    2342             : }
    2343             : 
    2344             : /************************************************************************/
    2345             : /*                       GWKGetPixelValueReal()                         */
    2346             : /************************************************************************/
    2347             : 
    2348       15516 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2349             :                                  GPtrDiff_t iSrcOffset, double *pdfDensity,
    2350             :                                  double *pdfReal)
    2351             : 
    2352             : {
    2353       15516 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2354             : 
    2355       31034 :     if (poWK->papanBandSrcValid != nullptr &&
    2356       15518 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2357           2 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2358             :     {
    2359           0 :         *pdfDensity = 0.0;
    2360           0 :         return false;
    2361             :     }
    2362             : 
    2363       15516 :     switch (poWK->eWorkingDataType)
    2364             :     {
    2365           1 :         case GDT_UInt8:
    2366           1 :             *pdfReal = pabySrc[iSrcOffset];
    2367           1 :             break;
    2368             : 
    2369           0 :         case GDT_Int8:
    2370           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2371           0 :             break;
    2372             : 
    2373           1 :         case GDT_Int16:
    2374           1 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2375           1 :             break;
    2376             : 
    2377           1 :         case GDT_UInt16:
    2378           1 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2379           1 :             break;
    2380             : 
    2381         982 :         case GDT_Int32:
    2382         982 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2383         982 :             break;
    2384             : 
    2385         179 :         case GDT_UInt32:
    2386         179 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2387         179 :             break;
    2388             : 
    2389         112 :         case GDT_Int64:
    2390         112 :             *pdfReal = static_cast<double>(
    2391         112 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2392         112 :             break;
    2393             : 
    2394         112 :         case GDT_UInt64:
    2395         112 :             *pdfReal = static_cast<double>(
    2396         112 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2397         112 :             break;
    2398             : 
    2399           0 :         case GDT_Float16:
    2400           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2401           0 :             break;
    2402             : 
    2403           2 :         case GDT_Float32:
    2404           2 :             *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
    2405           2 :             break;
    2406             : 
    2407       14126 :         case GDT_Float64:
    2408       14126 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2409       14126 :             break;
    2410             : 
    2411           0 :         case GDT_CInt16:
    2412             :         case GDT_CInt32:
    2413             :         case GDT_CFloat16:
    2414             :         case GDT_CFloat32:
    2415             :         case GDT_CFloat64:
    2416             :         case GDT_Unknown:
    2417             :         case GDT_TypeCount:
    2418           0 :             CPLAssert(false);
    2419             :             return false;
    2420             :     }
    2421             : 
    2422       15516 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2423           0 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2424             :     else
    2425       15516 :         *pdfDensity = 1.0;
    2426             : 
    2427       15516 :     return *pdfDensity != 0.0;
    2428             : }
    2429             : 
    2430             : /************************************************************************/
    2431             : /*                          GWKGetPixelRow()                            */
    2432             : /************************************************************************/
    2433             : 
    2434             : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
    2435             : /* data-types. */
    2436             : 
    2437     2369710 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
    2438             :                            GPtrDiff_t iSrcOffset, int nHalfSrcLen,
    2439             :                            double *padfDensity, double adfReal[],
    2440             :                            double *padfImag)
    2441             : {
    2442             :     // We know that nSrcLen is even, so we can *always* unroll loops 2x.
    2443     2369710 :     const int nSrcLen = nHalfSrcLen * 2;
    2444     2369710 :     bool bHasValid = false;
    2445             : 
    2446     2369710 :     if (padfDensity != nullptr)
    2447             :     {
    2448             :         // Init the density.
    2449     3384030 :         for (int i = 0; i < nSrcLen; i += 2)
    2450             :         {
    2451     2211910 :             padfDensity[i] = 1.0;
    2452     2211910 :             padfDensity[i + 1] = 1.0;
    2453             :         }
    2454             : 
    2455     1172120 :         if (poWK->panUnifiedSrcValid != nullptr)
    2456             :         {
    2457     3281460 :             for (int i = 0; i < nSrcLen; i += 2)
    2458             :             {
    2459     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
    2460     2067740 :                     bHasValid = true;
    2461             :                 else
    2462       74323 :                     padfDensity[i] = 0.0;
    2463             : 
    2464     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
    2465     2068400 :                     bHasValid = true;
    2466             :                 else
    2467       73668 :                     padfDensity[i + 1] = 0.0;
    2468             :             }
    2469             : 
    2470             :             // Reset or fail as needed.
    2471     1139400 :             if (bHasValid)
    2472     1116590 :                 bHasValid = false;
    2473             :             else
    2474       22806 :                 return false;
    2475             :         }
    2476             : 
    2477     1149320 :         if (poWK->papanBandSrcValid != nullptr &&
    2478           0 :             poWK->papanBandSrcValid[iBand] != nullptr)
    2479             :         {
    2480           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2481             :             {
    2482           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
    2483           0 :                     bHasValid = true;
    2484             :                 else
    2485           0 :                     padfDensity[i] = 0.0;
    2486             : 
    2487           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
    2488           0 :                                iSrcOffset + i + 1))
    2489           0 :                     bHasValid = true;
    2490             :                 else
    2491           0 :                     padfDensity[i + 1] = 0.0;
    2492             :             }
    2493             : 
    2494             :             // Reset or fail as needed.
    2495           0 :             if (bHasValid)
    2496           0 :                 bHasValid = false;
    2497             :             else
    2498           0 :                 return false;
    2499             :         }
    2500             :     }
    2501             : 
    2502             :     // TODO(schwehr): Fix casting.
    2503             :     // Fetch data.
    2504     2346910 :     switch (poWK->eWorkingDataType)
    2505             :     {
    2506     1136680 :         case GDT_UInt8:
    2507             :         {
    2508     1136680 :             GByte *pSrc =
    2509     1136680 :                 reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
    2510     1136680 :             pSrc += iSrcOffset;
    2511     3281570 :             for (int i = 0; i < nSrcLen; i += 2)
    2512             :             {
    2513     2144890 :                 adfReal[i] = pSrc[i];
    2514     2144890 :                 adfReal[i + 1] = pSrc[i + 1];
    2515             :             }
    2516     1136680 :             break;
    2517             :         }
    2518             : 
    2519           0 :         case GDT_Int8:
    2520             :         {
    2521           0 :             GInt8 *pSrc =
    2522           0 :                 reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
    2523           0 :             pSrc += iSrcOffset;
    2524           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2525             :             {
    2526           0 :                 adfReal[i] = pSrc[i];
    2527           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2528             :             }
    2529           0 :             break;
    2530             :         }
    2531             : 
    2532        5950 :         case GDT_Int16:
    2533             :         {
    2534        5950 :             GInt16 *pSrc =
    2535        5950 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2536        5950 :             pSrc += iSrcOffset;
    2537       22164 :             for (int i = 0; i < nSrcLen; i += 2)
    2538             :             {
    2539       16214 :                 adfReal[i] = pSrc[i];
    2540       16214 :                 adfReal[i + 1] = pSrc[i + 1];
    2541             :             }
    2542        5950 :             break;
    2543             :         }
    2544             : 
    2545        4310 :         case GDT_UInt16:
    2546             :         {
    2547        4310 :             GUInt16 *pSrc =
    2548        4310 :                 reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
    2549        4310 :             pSrc += iSrcOffset;
    2550       18884 :             for (int i = 0; i < nSrcLen; i += 2)
    2551             :             {
    2552       14574 :                 adfReal[i] = pSrc[i];
    2553       14574 :                 adfReal[i + 1] = pSrc[i + 1];
    2554             :             }
    2555        4310 :             break;
    2556             :         }
    2557             : 
    2558         946 :         case GDT_Int32:
    2559             :         {
    2560         946 :             GInt32 *pSrc =
    2561         946 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2562         946 :             pSrc += iSrcOffset;
    2563        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2564             :             {
    2565        1678 :                 adfReal[i] = pSrc[i];
    2566        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2567             :             }
    2568         946 :             break;
    2569             :         }
    2570             : 
    2571         946 :         case GDT_UInt32:
    2572             :         {
    2573         946 :             GUInt32 *pSrc =
    2574         946 :                 reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
    2575         946 :             pSrc += iSrcOffset;
    2576        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2577             :             {
    2578        1678 :                 adfReal[i] = pSrc[i];
    2579        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2580             :             }
    2581         946 :             break;
    2582             :         }
    2583             : 
    2584         196 :         case GDT_Int64:
    2585             :         {
    2586         196 :             auto pSrc =
    2587         196 :                 reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
    2588         196 :             pSrc += iSrcOffset;
    2589         392 :             for (int i = 0; i < nSrcLen; i += 2)
    2590             :             {
    2591         196 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2592         196 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2593             :             }
    2594         196 :             break;
    2595             :         }
    2596             : 
    2597         196 :         case GDT_UInt64:
    2598             :         {
    2599         196 :             auto pSrc =
    2600         196 :                 reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
    2601         196 :             pSrc += iSrcOffset;
    2602         392 :             for (int i = 0; i < nSrcLen; i += 2)
    2603             :             {
    2604         196 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2605         196 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2606             :             }
    2607         196 :             break;
    2608             :         }
    2609             : 
    2610           0 :         case GDT_Float16:
    2611             :         {
    2612           0 :             GFloat16 *pSrc =
    2613           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2614           0 :             pSrc += iSrcOffset;
    2615           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2616             :             {
    2617           0 :                 adfReal[i] = pSrc[i];
    2618           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2619             :             }
    2620           0 :             break;
    2621             :         }
    2622             : 
    2623       25270 :         case GDT_Float32:
    2624             :         {
    2625       25270 :             float *pSrc =
    2626       25270 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2627       25270 :             pSrc += iSrcOffset;
    2628      121739 :             for (int i = 0; i < nSrcLen; i += 2)
    2629             :             {
    2630       96469 :                 adfReal[i] = double(pSrc[i]);
    2631       96469 :                 adfReal[i + 1] = double(pSrc[i + 1]);
    2632             :             }
    2633       25270 :             break;
    2634             :         }
    2635             : 
    2636         946 :         case GDT_Float64:
    2637             :         {
    2638         946 :             double *pSrc =
    2639         946 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2640         946 :             pSrc += iSrcOffset;
    2641        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2642             :             {
    2643        1678 :                 adfReal[i] = pSrc[i];
    2644        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2645             :             }
    2646         946 :             break;
    2647             :         }
    2648             : 
    2649     1169220 :         case GDT_CInt16:
    2650             :         {
    2651     1169220 :             GInt16 *pSrc =
    2652     1169220 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2653     1169220 :             pSrc += 2 * iSrcOffset;
    2654     4676020 :             for (int i = 0; i < nSrcLen; i += 2)
    2655             :             {
    2656     3506800 :                 adfReal[i] = pSrc[2 * i];
    2657     3506800 :                 padfImag[i] = pSrc[2 * i + 1];
    2658             : 
    2659     3506800 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2660     3506800 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2661             :             }
    2662     1169220 :             break;
    2663             :         }
    2664             : 
    2665         750 :         case GDT_CInt32:
    2666             :         {
    2667         750 :             GInt32 *pSrc =
    2668         750 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2669         750 :             pSrc += 2 * iSrcOffset;
    2670        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2671             :             {
    2672        1482 :                 adfReal[i] = pSrc[2 * i];
    2673        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2674             : 
    2675        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2676        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2677             :             }
    2678         750 :             break;
    2679             :         }
    2680             : 
    2681           0 :         case GDT_CFloat16:
    2682             :         {
    2683           0 :             GFloat16 *pSrc =
    2684           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2685           0 :             pSrc += 2 * iSrcOffset;
    2686           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2687             :             {
    2688           0 :                 adfReal[i] = pSrc[2 * i];
    2689           0 :                 padfImag[i] = pSrc[2 * i + 1];
    2690             : 
    2691           0 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2692           0 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2693             :             }
    2694           0 :             break;
    2695             :         }
    2696             : 
    2697         750 :         case GDT_CFloat32:
    2698             :         {
    2699         750 :             float *pSrc =
    2700         750 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2701         750 :             pSrc += 2 * iSrcOffset;
    2702        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2703             :             {
    2704        1482 :                 adfReal[i] = double(pSrc[2 * i]);
    2705        1482 :                 padfImag[i] = double(pSrc[2 * i + 1]);
    2706             : 
    2707        1482 :                 adfReal[i + 1] = double(pSrc[2 * i + 2]);
    2708        1482 :                 padfImag[i + 1] = double(pSrc[2 * i + 3]);
    2709             :             }
    2710         750 :             break;
    2711             :         }
    2712             : 
    2713         750 :         case GDT_CFloat64:
    2714             :         {
    2715         750 :             double *pSrc =
    2716         750 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2717         750 :             pSrc += 2 * iSrcOffset;
    2718        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2719             :             {
    2720        1482 :                 adfReal[i] = pSrc[2 * i];
    2721        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2722             : 
    2723        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2724        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2725             :             }
    2726         750 :             break;
    2727             :         }
    2728             : 
    2729           0 :         case GDT_Unknown:
    2730             :         case GDT_TypeCount:
    2731           0 :             CPLAssert(false);
    2732             :             if (padfDensity)
    2733             :                 memset(padfDensity, 0, nSrcLen * sizeof(double));
    2734             :             return false;
    2735             :     }
    2736             : 
    2737     2346910 :     if (padfDensity == nullptr)
    2738     1197590 :         return true;
    2739             : 
    2740     1149320 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2741             :     {
    2742     3256740 :         for (int i = 0; i < nSrcLen; i += 2)
    2743             :         {
    2744             :             // Take into account earlier calcs.
    2745     2127390 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2746             :             {
    2747     2087480 :                 padfDensity[i] = 1.0;
    2748     2087480 :                 bHasValid = true;
    2749             :             }
    2750             : 
    2751     2127390 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2752             :             {
    2753     2088140 :                 padfDensity[i + 1] = 1.0;
    2754     2088140 :                 bHasValid = true;
    2755             :             }
    2756             :         }
    2757             :     }
    2758             :     else
    2759             :     {
    2760       70068 :         for (int i = 0; i < nSrcLen; i += 2)
    2761             :         {
    2762       50103 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2763       50103 :                 padfDensity[i] =
    2764       50103 :                     double(poWK->pafUnifiedSrcDensity[iSrcOffset + i]);
    2765       50103 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2766       49252 :                 bHasValid = true;
    2767             : 
    2768       50103 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2769       50103 :                 padfDensity[i + 1] =
    2770       50103 :                     double(poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]);
    2771       50103 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2772       49186 :                 bHasValid = true;
    2773             :         }
    2774             :     }
    2775             : 
    2776     1149320 :     return bHasValid;
    2777             : }
    2778             : 
    2779             : /************************************************************************/
    2780             : /*                          GWKGetPixelT()                              */
    2781             : /************************************************************************/
    2782             : 
    2783             : template <class T>
    2784    10002719 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
    2785             :                          GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
    2786             : 
    2787             : {
    2788    10002719 :     T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2789             : 
    2790    22733143 :     if ((poWK->panUnifiedSrcValid != nullptr &&
    2791    20005418 :          !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
    2792    10002719 :         (poWK->papanBandSrcValid != nullptr &&
    2793      589836 :          poWK->papanBandSrcValid[iBand] != nullptr &&
    2794      589836 :          !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
    2795             :     {
    2796           9 :         *pdfDensity = 0.0;
    2797           9 :         return false;
    2798             :     }
    2799             : 
    2800    10002709 :     *pValue = pSrc[iSrcOffset];
    2801             : 
    2802    10002709 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2803     8880346 :         *pdfDensity = 1.0;
    2804             :     else
    2805     1122362 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2806             : 
    2807    10002709 :     return *pdfDensity != 0.0;
    2808             : }
    2809             : 
    2810             : /************************************************************************/
    2811             : /*                        GWKBilinearResample()                         */
    2812             : /*     Set of bilinear interpolators                                    */
    2813             : /************************************************************************/
    2814             : 
    2815       77448 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2816             :                                        double dfSrcX, double dfSrcY,
    2817             :                                        double *pdfDensity, double *pdfReal,
    2818             :                                        double *pdfImag)
    2819             : 
    2820             : {
    2821             :     // Save as local variables to avoid following pointers.
    2822       77448 :     const int nSrcXSize = poWK->nSrcXSize;
    2823       77448 :     const int nSrcYSize = poWK->nSrcYSize;
    2824             : 
    2825       77448 :     int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2826       77448 :     int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2827       77448 :     double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2828       77448 :     double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2829       77448 :     bool bShifted = false;
    2830             : 
    2831       77448 :     if (iSrcX == -1)
    2832             :     {
    2833        1534 :         iSrcX = 0;
    2834        1534 :         dfRatioX = 1;
    2835             :     }
    2836       77448 :     if (iSrcY == -1)
    2837             :     {
    2838        7734 :         iSrcY = 0;
    2839        7734 :         dfRatioY = 1;
    2840             :     }
    2841       77448 :     GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    2842             : 
    2843             :     // Shift so we don't overrun the array.
    2844       77448 :     if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
    2845       77330 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
    2846       77330 :             iSrcOffset + nSrcXSize + 1)
    2847             :     {
    2848         230 :         bShifted = true;
    2849         230 :         --iSrcOffset;
    2850             :     }
    2851             : 
    2852       77448 :     double adfDensity[2] = {0.0, 0.0};
    2853       77448 :     double adfReal[2] = {0.0, 0.0};
    2854       77448 :     double adfImag[2] = {0.0, 0.0};
    2855       77448 :     double dfAccumulatorReal = 0.0;
    2856       77448 :     double dfAccumulatorImag = 0.0;
    2857       77448 :     double dfAccumulatorDensity = 0.0;
    2858       77448 :     double dfAccumulatorDivisor = 0.0;
    2859             : 
    2860       77448 :     const GPtrDiff_t nSrcPixels =
    2861       77448 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
    2862             :     // Get pixel row.
    2863       77448 :     if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
    2864      154896 :         iSrcOffset < nSrcPixels &&
    2865       77448 :         GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
    2866             :                        adfImag))
    2867             :     {
    2868       71504 :         double dfMult1 = dfRatioX * dfRatioY;
    2869       71504 :         double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
    2870             : 
    2871             :         // Shifting corrected.
    2872       71504 :         if (bShifted)
    2873             :         {
    2874         230 :             adfReal[0] = adfReal[1];
    2875         230 :             adfImag[0] = adfImag[1];
    2876         230 :             adfDensity[0] = adfDensity[1];
    2877             :         }
    2878             : 
    2879             :         // Upper Left Pixel.
    2880       71504 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2881       71504 :             adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2882             :         {
    2883       66050 :             dfAccumulatorDivisor += dfMult1;
    2884             : 
    2885       66050 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    2886       66050 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    2887       66050 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    2888             :         }
    2889             : 
    2890             :         // Upper Right Pixel.
    2891       71504 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    2892       70609 :             adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2893             :         {
    2894       65335 :             dfAccumulatorDivisor += dfMult2;
    2895             : 
    2896       65335 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    2897       65335 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    2898       65335 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    2899             :         }
    2900             :     }
    2901             : 
    2902             :     // Get pixel row.
    2903       77448 :     if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
    2904      228032 :         iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
    2905       73136 :         GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
    2906             :                        adfReal, adfImag))
    2907             :     {
    2908       67577 :         double dfMult1 = dfRatioX * (1.0 - dfRatioY);
    2909       67577 :         double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    2910             : 
    2911             :         // Shifting corrected
    2912       67577 :         if (bShifted)
    2913             :         {
    2914         112 :             adfReal[0] = adfReal[1];
    2915         112 :             adfImag[0] = adfImag[1];
    2916         112 :             adfDensity[0] = adfDensity[1];
    2917             :         }
    2918             : 
    2919             :         // Lower Left Pixel
    2920       67577 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2921       67577 :             adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2922             :         {
    2923       62298 :             dfAccumulatorDivisor += dfMult1;
    2924             : 
    2925       62298 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    2926       62298 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    2927       62298 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    2928             :         }
    2929             : 
    2930             :         // Lower Right Pixel.
    2931       67577 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    2932       66800 :             adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2933             :         {
    2934       61823 :             dfAccumulatorDivisor += dfMult2;
    2935             : 
    2936       61823 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    2937       61823 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    2938       61823 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    2939             :         }
    2940             :     }
    2941             : 
    2942             :     /* -------------------------------------------------------------------- */
    2943             :     /*      Return result.                                                  */
    2944             :     /* -------------------------------------------------------------------- */
    2945       77448 :     if (dfAccumulatorDivisor == 1.0)
    2946             :     {
    2947       45929 :         *pdfReal = dfAccumulatorReal;
    2948       45929 :         *pdfImag = dfAccumulatorImag;
    2949       45929 :         *pdfDensity = dfAccumulatorDensity;
    2950       45929 :         return false;
    2951             :     }
    2952       31519 :     else if (dfAccumulatorDivisor < 0.00001)
    2953             :     {
    2954           0 :         *pdfReal = 0.0;
    2955           0 :         *pdfImag = 0.0;
    2956           0 :         *pdfDensity = 0.0;
    2957           0 :         return false;
    2958             :     }
    2959             :     else
    2960             :     {
    2961       31519 :         *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
    2962       31519 :         *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
    2963       31519 :         *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
    2964       31519 :         return true;
    2965             :     }
    2966             : }
    2967             : 
    2968             : template <class T>
    2969     6765770 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    2970             :                                                int iBand, double dfSrcX,
    2971             :                                                double dfSrcY, T *pValue)
    2972             : 
    2973             : {
    2974             : 
    2975     6765770 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2976     6765770 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2977     6765770 :     GPtrDiff_t iSrcOffset =
    2978     6765770 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    2979     6765770 :     const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2980     6765770 :     const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2981             : 
    2982     6765770 :     const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2983             : 
    2984     6765770 :     if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    2985     4482638 :         iSrcY + 1 < poWK->nSrcYSize)
    2986             :     {
    2987     4439120 :         const double dfAccumulator =
    2988     4439120 :             (double(pSrc[iSrcOffset]) * dfRatioX +
    2989     4439120 :              double(pSrc[iSrcOffset + 1]) * (1.0 - dfRatioX)) *
    2990             :                 dfRatioY +
    2991     4439120 :             (double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfRatioX +
    2992     4439120 :              double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) *
    2993     4439120 :                  (1.0 - dfRatioX)) *
    2994     4439120 :                 (1.0 - dfRatioY);
    2995             : 
    2996     4439120 :         *pValue = GWKRoundValueT<T>(dfAccumulator);
    2997             : 
    2998     4439120 :         return true;
    2999             :     }
    3000             : 
    3001     2326650 :     double dfAccumulatorDivisor = 0.0;
    3002     2326650 :     double dfAccumulator = 0.0;
    3003             : 
    3004             :     // Upper Left Pixel.
    3005     2326650 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
    3006      272257 :         iSrcY < poWK->nSrcYSize)
    3007             :     {
    3008      272257 :         const double dfMult = dfRatioX * dfRatioY;
    3009             : 
    3010      272257 :         dfAccumulatorDivisor += dfMult;
    3011             : 
    3012      272257 :         dfAccumulator += double(pSrc[iSrcOffset]) * dfMult;
    3013             :     }
    3014             : 
    3015             :     // Upper Right Pixel.
    3016     2326650 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    3017     1973090 :         iSrcY < poWK->nSrcYSize)
    3018             :     {
    3019     1973090 :         const double dfMult = (1.0 - dfRatioX) * dfRatioY;
    3020             : 
    3021     1973090 :         dfAccumulatorDivisor += dfMult;
    3022             : 
    3023     1973090 :         dfAccumulator += double(pSrc[iSrcOffset + 1]) * dfMult;
    3024             :     }
    3025             : 
    3026             :     // Lower Right Pixel.
    3027     2326650 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    3028     2064364 :         iSrcY + 1 < poWK->nSrcYSize)
    3029             :     {
    3030     1987572 :         const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    3031             : 
    3032     1987572 :         dfAccumulatorDivisor += dfMult;
    3033             : 
    3034     1987572 :         dfAccumulator +=
    3035     1987572 :             double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) * dfMult;
    3036             :     }
    3037             : 
    3038             :     // Lower Left Pixel.
    3039     2326650 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    3040      363504 :         iSrcY + 1 < poWK->nSrcYSize)
    3041             :     {
    3042      286487 :         const double dfMult = dfRatioX * (1.0 - dfRatioY);
    3043             : 
    3044      286487 :         dfAccumulatorDivisor += dfMult;
    3045             : 
    3046      286487 :         dfAccumulator += double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfMult;
    3047             :     }
    3048             : 
    3049             :     /* -------------------------------------------------------------------- */
    3050             :     /*      Return result.                                                  */
    3051             :     /* -------------------------------------------------------------------- */
    3052     2326650 :     double dfValue = 0.0;
    3053             : 
    3054     2326650 :     if (dfAccumulatorDivisor < 0.00001)
    3055             :     {
    3056           0 :         *pValue = 0;
    3057           0 :         return false;
    3058             :     }
    3059     2326650 :     else if (dfAccumulatorDivisor == 1.0)
    3060             :     {
    3061        7320 :         dfValue = dfAccumulator;
    3062             :     }
    3063             :     else
    3064             :     {
    3065     2319328 :         dfValue = dfAccumulator / dfAccumulatorDivisor;
    3066             :     }
    3067             : 
    3068     2326650 :     *pValue = GWKRoundValueT<T>(dfValue);
    3069             : 
    3070     2326650 :     return true;
    3071             : }
    3072             : 
    3073             : /************************************************************************/
    3074             : /*                        GWKCubicResample()                            */
    3075             : /*     Set of bicubic interpolators using cubic convolution.            */
    3076             : /************************************************************************/
    3077             : 
    3078             : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
    3079             : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
    3080             : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
    3081             : 
    3082             : template <typename T>
    3083     1742940 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
    3084             :                                  T f1, T f2, T f3)
    3085             : {
    3086     1742940 :     return (f1 + T(0.5) * (distance1 * (f2 - f0) +
    3087     1742940 :                            distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
    3088     1742940 :                            distance3 * (3 * (f1 - f2) + f3 - f0)));
    3089             : }
    3090             : 
    3091             : /************************************************************************/
    3092             : /*                       GWKCubicComputeWeights()                       */
    3093             : /************************************************************************/
    3094             : 
    3095             : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
    3096             : 
    3097             : template <typename T>
    3098    75432480 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
    3099             : {
    3100    75432480 :     const T halfX = T(0.5) * x;
    3101    75432480 :     const T threeX = T(3.0) * x;
    3102    75432480 :     const T halfX2 = halfX * x;
    3103             : 
    3104    75432480 :     coeffs[0] = halfX * (-1 + x * (2 - x));
    3105    75432480 :     coeffs[1] = 1 + halfX2 * (-5 + threeX);
    3106    75432480 :     coeffs[2] = halfX * (1 + x * (4 - threeX));
    3107    75432480 :     coeffs[3] = halfX2 * (-1 + x);
    3108    75432480 : }
    3109             : 
    3110    14411416 : template <typename T> inline double CONVOL4(const double v1[4], const T v2[4])
    3111             : {
    3112    14411416 :     return v1[0] * double(v2[0]) + v1[1] * double(v2[1]) +
    3113    14411416 :            v1[2] * double(v2[2]) + v1[3] * double(v2[3]);
    3114             : }
    3115             : 
    3116             : #if 0
    3117             : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
    3118             : // instead of 17.
    3119             : // TODO(schwehr): Use an inline function.
    3120             : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX)             \
    3121             :     {                                                                          \
    3122             :         const double dfX = dfX_;                                               \
    3123             :         dfHalfX = 0.5 * dfX;                                                   \
    3124             :         const double dfThreeX = 3.0 * dfX;                                     \
    3125             :         const double dfXMinus1 = dfX - 1;                                      \
    3126             :                                                                                \
    3127             :         adfCoeffs[0] = -1 + dfX * (2 - dfX);                                   \
    3128             :         adfCoeffs[1] = dfX * (-5 + dfThreeX);                                  \
    3129             :         /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/                           \
    3130             :         adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1];                              \
    3131             :         /*adfCoeffs[3] = dfX * (-1 + dfX); */                                  \
    3132             :         adfCoeffs[3] = dfXMinus1 - adfCoeffs[0];                               \
    3133             :     }
    3134             : 
    3135             : // TODO(schwehr): Use an inline function.
    3136             : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX)                               \
    3137             :     ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
    3138             :                            (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
    3139             : #endif
    3140             : 
    3141      302045 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
    3142             :                                     double dfSrcX, double dfSrcY,
    3143             :                                     double *pdfDensity, double *pdfReal,
    3144             :                                     double *pdfImag)
    3145             : 
    3146             : {
    3147      302045 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3148      302045 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3149      302045 :     GPtrDiff_t iSrcOffset =
    3150      302045 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3151      302045 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3152      302045 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3153      302045 :     double adfDensity[4] = {};
    3154      302045 :     double adfReal[4] = {};
    3155      302045 :     double adfImag[4] = {};
    3156             : 
    3157             :     // Get the bilinear interpolation at the image borders.
    3158      302045 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3159      286140 :         iSrcY + 2 >= poWK->nSrcYSize)
    3160       24670 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3161       24670 :                                           pdfDensity, pdfReal, pdfImag);
    3162             : 
    3163      277375 :     double adfValueDens[4] = {};
    3164      277375 :     double adfValueReal[4] = {};
    3165      277375 :     double adfValueImag[4] = {};
    3166             : 
    3167      277375 :     double adfCoeffsX[4] = {};
    3168      277375 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3169             : 
    3170     1240570 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3171             :     {
    3172     1009640 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3173      998035 :                             2, adfDensity, adfReal, adfImag) ||
    3174      998035 :             adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3175      980395 :             adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3176     2979770 :             adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3177      972094 :             adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
    3178             :         {
    3179       46449 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3180       46449 :                                               pdfDensity, pdfReal, pdfImag);
    3181             :         }
    3182             : 
    3183      963196 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3184      963196 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3185      963196 :         adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
    3186             :     }
    3187             : 
    3188             :     /* -------------------------------------------------------------------- */
    3189             :     /*      For now, if we have any pixels missing in the kernel area,      */
    3190             :     /*      we fallback on using bilinear interpolation.  Ideally we        */
    3191             :     /*      should do "weight adjustment" of our results similarly to       */
    3192             :     /*      what is done for the cubic spline and lanc. interpolators.      */
    3193             :     /* -------------------------------------------------------------------- */
    3194             : 
    3195      230926 :     double adfCoeffsY[4] = {};
    3196      230926 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3197             : 
    3198      230926 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3199      230926 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3200      230926 :     *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
    3201             : 
    3202      230926 :     return true;
    3203             : }
    3204             : 
    3205             : #ifdef USE_SSE2
    3206             : 
    3207             : /************************************************************************/
    3208             : /*                           XMMLoad4Values()                           */
    3209             : /*                                                                      */
    3210             : /*  Load 4 packed byte or uint16, cast them to float and put them in a  */
    3211             : /*  m128 register.                                                      */
    3212             : /************************************************************************/
    3213             : 
    3214   433649000 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
    3215             : {
    3216             :     unsigned int i;
    3217   433649000 :     memcpy(&i, ptr, 4);
    3218   867297000 :     __m128i xmm_i = _mm_cvtsi32_si128(i);
    3219             :     // Zero extend 4 packed unsigned 8-bit integers in a to packed
    3220             :     // 32-bit integers.
    3221             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3222             :     xmm_i = _mm_cvtepu8_epi32(xmm_i);
    3223             : #else
    3224   867297000 :     xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
    3225   867297000 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3226             : #endif
    3227   867297000 :     return _mm_cvtepi32_ps(xmm_i);
    3228             : }
    3229             : 
    3230      791724 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
    3231             : {
    3232             :     GUInt64 i;
    3233      791724 :     memcpy(&i, ptr, 8);
    3234     1583450 :     __m128i xmm_i = _mm_cvtsi64_si128(i);
    3235             :     // Zero extend 4 packed unsigned 16-bit integers in a to packed
    3236             :     // 32-bit integers.
    3237             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3238             :     xmm_i = _mm_cvtepu16_epi32(xmm_i);
    3239             : #else
    3240     1583450 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3241             : #endif
    3242     1583450 :     return _mm_cvtepi32_ps(xmm_i);
    3243             : }
    3244             : 
    3245             : /************************************************************************/
    3246             : /*                           XMMHorizontalAdd()                         */
    3247             : /*                                                                      */
    3248             : /*  Return the sum of the 4 floating points of the register.            */
    3249             : /************************************************************************/
    3250             : 
    3251             : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
    3252             : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3253             : {
    3254             :     __m128 shuf = _mm_movehdup_ps(v);   // (v3   , v3   , v1   , v1)
    3255             :     __m128 sums = _mm_add_ps(v, shuf);  // (v3+v3, v3+v2, v1+v1, v1+v0)
    3256             :     shuf = _mm_movehl_ps(shuf, sums);   // (v3   , v3   , v3+v3, v3+v2)
    3257             :     sums = _mm_add_ss(sums, shuf);      // (v1+v0)+(v3+v2)
    3258             :     return _mm_cvtss_f32(sums);
    3259             : }
    3260             : #else
    3261   108610000 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3262             : {
    3263   108610000 :     __m128 shuf = _mm_movehl_ps(v, v);     // (v3   , v2   , v3   , v2)
    3264   108610000 :     __m128 sums = _mm_add_ps(v, shuf);     // (v3+v3, v2+v2, v3+v1, v2+v0)
    3265   108610000 :     shuf = _mm_shuffle_ps(sums, sums, 1);  // (v2+v0, v2+v0, v2+v0, v3+v1)
    3266   108610000 :     sums = _mm_add_ss(sums, shuf);         // (v2+v0)+(v3+v1)
    3267   108610000 :     return _mm_cvtss_f32(sums);
    3268             : }
    3269             : #endif
    3270             : 
    3271             : #endif  // define USE_SSE2
    3272             : 
    3273             : /************************************************************************/
    3274             : /*            GWKCubicResampleSrcMaskIsDensity4SampleRealT()            */
    3275             : /************************************************************************/
    3276             : 
    3277             : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
    3278             : // because there are a few assumptions above those types.
    3279             : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
    3280             : // perf benefit.
    3281             : 
    3282             : template <class T>
    3283      389755 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
    3284             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3285             :     double *pdfDensity, double *pdfReal)
    3286             : {
    3287      389755 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3288      389755 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3289      389755 :     const GPtrDiff_t iSrcOffset =
    3290      389755 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3291             : 
    3292             :     // Get the bilinear interpolation at the image borders.
    3293      389755 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3294      387271 :         iSrcY + 2 >= poWK->nSrcYSize)
    3295             :     {
    3296        2484 :         double adfImagIgnored[4] = {};
    3297        2484 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3298        2484 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3299             :     }
    3300             : 
    3301             : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3302             :     const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
    3303             :     const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
    3304             : 
    3305             :     // TODO(schwehr): Explain the magic numbers.
    3306             :     float afTemp[4 + 4 + 4 + 1];
    3307             :     float *pafAligned =
    3308             :         reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
    3309             :     float *pafCoeffs = pafAligned;
    3310             :     float *pafDensity = pafAligned + 4;
    3311             :     float *pafValue = pafAligned + 8;
    3312             : 
    3313             :     const float fHalfDeltaX = 0.5f * fDeltaX;
    3314             :     const float fThreeDeltaX = 3.0f * fDeltaX;
    3315             :     const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
    3316             : 
    3317             :     pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
    3318             :     pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
    3319             :     pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
    3320             :     pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
    3321             :     __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
    3322             :     const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD_FLOAT);
    3323             : 
    3324             :     __m128 xmmMaskLowDensity = _mm_setzero_ps();
    3325             :     for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
    3326             :          i++, iOffset += poWK->nSrcXSize)
    3327             :     {
    3328             :         const __m128 xmmDensity =
    3329             :             _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
    3330             :         xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
    3331             :                                       _mm_cmplt_ps(xmmDensity, xmmThreshold));
    3332             :         pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3333             : 
    3334             :         const __m128 xmmValues =
    3335             :             XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
    3336             :         pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
    3337             :     }
    3338             :     if (_mm_movemask_ps(xmmMaskLowDensity))
    3339             :     {
    3340             :         double adfImagIgnored[4] = {};
    3341             :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3342             :                                           pdfDensity, pdfReal, adfImagIgnored);
    3343             :     }
    3344             : 
    3345             :     const float fHalfDeltaY = 0.5f * fDeltaY;
    3346             :     const float fThreeDeltaY = 3.0f * fDeltaY;
    3347             :     const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
    3348             : 
    3349             :     pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
    3350             :     pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
    3351             :     pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
    3352             :     pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
    3353             : 
    3354             :     xmmCoeffs = _mm_load_ps(pafCoeffs);
    3355             : 
    3356             :     const __m128 xmmDensity = _mm_load_ps(pafDensity);
    3357             :     const __m128 xmmValue = _mm_load_ps(pafValue);
    3358             :     *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3359             :     *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
    3360             : 
    3361             :     // We did all above computations on float32 whereas the general case is
    3362             :     // float64. Not sure if one is fundamentally more correct than the other
    3363             :     // one, but we want our optimization to give the same result as the
    3364             :     // general case as much as possible, so if the resulting value is
    3365             :     // close to some_int_value + 0.5, redo the computation with the general
    3366             :     // case.
    3367             :     // Note: If other types than Byte or UInt16, will need changes.
    3368             :     if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
    3369             :         return true;
    3370             : 
    3371             : #endif  // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3372             : 
    3373      387271 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3374      387271 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3375             : 
    3376      387271 :     double adfValueDens[4] = {};
    3377      387271 :     double adfValueReal[4] = {};
    3378             : 
    3379      387271 :     double adfCoeffsX[4] = {};
    3380      387271 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3381             : 
    3382      387271 :     double adfCoeffsY[4] = {};
    3383      387271 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3384             : 
    3385     1930200 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3386             :     {
    3387     1544480 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3388             : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
    3389     1544480 :         if (poWK->pafUnifiedSrcDensity[iOffset + 0] <
    3390     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3391     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 1] <
    3392     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3393     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 2] <
    3394     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3395     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 3] <
    3396             :                 SRC_DENSITY_THRESHOLD_FLOAT)
    3397             :         {
    3398        1551 :             double adfImagIgnored[4] = {};
    3399        1551 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3400             :                                               pdfDensity, pdfReal,
    3401        1551 :                                               adfImagIgnored);
    3402             :         }
    3403             : #endif
    3404             : 
    3405     3085860 :         adfValueDens[i + 1] =
    3406     1542930 :             CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
    3407             : 
    3408     1542930 :         adfValueReal[i + 1] = CONVOL4(
    3409             :             adfCoeffsX,
    3410     1542930 :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3411             :     }
    3412             : 
    3413      385720 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3414      385720 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3415             : 
    3416      385720 :     return true;
    3417             : }
    3418             : 
    3419             : /************************************************************************/
    3420             : /*              GWKCubicResampleSrcMaskIsDensity4SampleReal()             */
    3421             : /*     Bi-cubic when source has and only has pafUnifiedSrcDensity.      */
    3422             : /************************************************************************/
    3423             : 
    3424           0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
    3425             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3426             :     double *pdfDensity, double *pdfReal)
    3427             : 
    3428             : {
    3429           0 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3430           0 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3431           0 :     const GPtrDiff_t iSrcOffset =
    3432           0 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3433           0 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3434           0 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3435             : 
    3436             :     // Get the bilinear interpolation at the image borders.
    3437           0 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3438           0 :         iSrcY + 2 >= poWK->nSrcYSize)
    3439             :     {
    3440           0 :         double adfImagIgnored[4] = {};
    3441           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3442           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3443             :     }
    3444             : 
    3445           0 :     double adfCoeffsX[4] = {};
    3446           0 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3447             : 
    3448           0 :     double adfCoeffsY[4] = {};
    3449           0 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3450             : 
    3451           0 :     double adfValueDens[4] = {};
    3452           0 :     double adfValueReal[4] = {};
    3453           0 :     double adfDensity[4] = {};
    3454           0 :     double adfReal[4] = {};
    3455           0 :     double adfImagIgnored[4] = {};
    3456             : 
    3457           0 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3458             :     {
    3459           0 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3460           0 :                             2, adfDensity, adfReal, adfImagIgnored) ||
    3461           0 :             adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3462           0 :             adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3463           0 :             adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3464           0 :             adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
    3465             :         {
    3466           0 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3467             :                                               pdfDensity, pdfReal,
    3468           0 :                                               adfImagIgnored);
    3469             :         }
    3470             : 
    3471           0 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3472           0 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3473             :     }
    3474             : 
    3475           0 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3476           0 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3477             : 
    3478           0 :     return true;
    3479             : }
    3480             : 
    3481             : template <class T>
    3482     2231485 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3483             :                                             int iBand, double dfSrcX,
    3484             :                                             double dfSrcY, T *pValue)
    3485             : 
    3486             : {
    3487     2231485 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3488     2231485 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3489     2231485 :     const GPtrDiff_t iSrcOffset =
    3490     2231485 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3491     2231485 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3492     2231485 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3493     2231485 :     const double dfDeltaY2 = dfDeltaY * dfDeltaY;
    3494     2231485 :     const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
    3495             : 
    3496             :     // Get the bilinear interpolation at the image borders.
    3497     2231485 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3498     1814944 :         iSrcY + 2 >= poWK->nSrcYSize)
    3499      488548 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    3500      488548 :                                                   pValue);
    3501             : 
    3502     1742937 :     double adfCoeffs[4] = {};
    3503     1742937 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
    3504             : 
    3505     1742937 :     double adfValue[4] = {};
    3506             : 
    3507     8714670 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3508             :     {
    3509     6971746 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3510             : 
    3511     6971746 :         adfValue[i + 1] = CONVOL4(
    3512             :             adfCoeffs,
    3513     6971746 :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3514             :     }
    3515             : 
    3516             :     const double dfValue =
    3517     1742937 :         CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
    3518             :                          adfValue[1], adfValue[2], adfValue[3]);
    3519             : 
    3520     1742937 :     *pValue = GWKClampValueT<T>(dfValue);
    3521             : 
    3522     1742937 :     return true;
    3523             : }
    3524             : 
    3525             : /************************************************************************/
    3526             : /*                          GWKLanczosSinc()                            */
    3527             : /************************************************************************/
    3528             : 
    3529             : /*
    3530             :  * Lanczos windowed sinc interpolation kernel with radius r.
    3531             :  *        /
    3532             :  *        | sinc(x) * sinc(x/r), if |x| < r
    3533             :  * L(x) = | 1, if x = 0                     ,
    3534             :  *        | 0, otherwise
    3535             :  *        \
    3536             :  *
    3537             :  * where sinc(x) = sin(PI * x) / (PI * x).
    3538             :  */
    3539             : 
    3540        1632 : static double GWKLanczosSinc(double dfX)
    3541             : {
    3542        1632 :     if (dfX == 0.0)
    3543           0 :         return 1.0;
    3544             : 
    3545        1632 :     const double dfPIX = M_PI * dfX;
    3546        1632 :     const double dfPIXoverR = dfPIX / 3;
    3547        1632 :     const double dfPIX2overR = dfPIX * dfPIXoverR;
    3548             :     // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3549             :     // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3550        1632 :     const double dfSinPIXoverR = sin(dfPIXoverR);
    3551        1632 :     const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3552        1632 :     const double dfSinPIXMulSinPIXoverR =
    3553        1632 :         (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3554        1632 :     return dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3555             : }
    3556             : 
    3557      106692 : static double GWKLanczosSinc4Values(double *padfValues)
    3558             : {
    3559      533460 :     for (int i = 0; i < 4; i++)
    3560             :     {
    3561      426768 :         if (padfValues[i] == 0.0)
    3562             :         {
    3563           0 :             padfValues[i] = 1.0;
    3564             :         }
    3565             :         else
    3566             :         {
    3567      426768 :             const double dfPIX = M_PI * padfValues[i];
    3568      426768 :             const double dfPIXoverR = dfPIX / 3;
    3569      426768 :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3570             :             // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3571             :             // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3572      426768 :             const double dfSinPIXoverR = sin(dfPIXoverR);
    3573      426768 :             const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3574      426768 :             const double dfSinPIXMulSinPIXoverR =
    3575      426768 :                 (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3576      426768 :             padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3577             :         }
    3578             :     }
    3579      106692 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3580             : }
    3581             : 
    3582             : /************************************************************************/
    3583             : /*                           GWKBilinear()                              */
    3584             : /************************************************************************/
    3585             : 
    3586     6670070 : static double GWKBilinear(double dfX)
    3587             : {
    3588     6670070 :     double dfAbsX = fabs(dfX);
    3589     6670070 :     if (dfAbsX <= 1.0)
    3590     6198950 :         return 1 - dfAbsX;
    3591             :     else
    3592      471127 :         return 0.0;
    3593             : }
    3594             : 
    3595      401592 : static double GWKBilinear4Values(double *padfValues)
    3596             : {
    3597      401592 :     double dfAbsX0 = fabs(padfValues[0]);
    3598      401592 :     double dfAbsX1 = fabs(padfValues[1]);
    3599      401592 :     double dfAbsX2 = fabs(padfValues[2]);
    3600      401592 :     double dfAbsX3 = fabs(padfValues[3]);
    3601      401592 :     if (dfAbsX0 <= 1.0)
    3602      295634 :         padfValues[0] = 1 - dfAbsX0;
    3603             :     else
    3604      105958 :         padfValues[0] = 0.0;
    3605      401592 :     if (dfAbsX1 <= 1.0)
    3606      401592 :         padfValues[1] = 1 - dfAbsX1;
    3607             :     else
    3608           0 :         padfValues[1] = 0.0;
    3609      401592 :     if (dfAbsX2 <= 1.0)
    3610      401592 :         padfValues[2] = 1 - dfAbsX2;
    3611             :     else
    3612           0 :         padfValues[2] = 0.0;
    3613      401592 :     if (dfAbsX3 <= 1.0)
    3614      295510 :         padfValues[3] = 1 - dfAbsX3;
    3615             :     else
    3616      106082 :         padfValues[3] = 0.0;
    3617      401592 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3618             : }
    3619             : 
    3620             : /************************************************************************/
    3621             : /*                            GWKCubic()                                */
    3622             : /************************************************************************/
    3623             : 
    3624     4383010 : static double GWKCubic(double dfX)
    3625             : {
    3626     4383010 :     return CubicKernel(dfX);
    3627             : }
    3628             : 
    3629     8384070 : static double GWKCubic4Values(double *padfValues)
    3630             : {
    3631     8384070 :     const double dfAbsX_0 = fabs(padfValues[0]);
    3632     8384070 :     const double dfAbsX_1 = fabs(padfValues[1]);
    3633     8384070 :     const double dfAbsX_2 = fabs(padfValues[2]);
    3634     8384070 :     const double dfAbsX_3 = fabs(padfValues[3]);
    3635     8384070 :     const double dfX2_0 = padfValues[0] * padfValues[0];
    3636     8384070 :     const double dfX2_1 = padfValues[1] * padfValues[1];
    3637     8384070 :     const double dfX2_2 = padfValues[2] * padfValues[2];
    3638     8384070 :     const double dfX2_3 = padfValues[3] * padfValues[3];
    3639             : 
    3640     8384070 :     double dfVal0 = 0.0;
    3641     8384070 :     if (dfAbsX_0 <= 1.0)
    3642     1562300 :         dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
    3643     6821770 :     else if (dfAbsX_0 <= 2.0)
    3644     4951580 :         dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
    3645             : 
    3646     8384070 :     double dfVal1 = 0.0;
    3647     8384070 :     if (dfAbsX_1 <= 1.0)
    3648     4822010 :         dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
    3649     3562060 :     else if (dfAbsX_1 <= 2.0)
    3650     3562060 :         dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
    3651             : 
    3652     8384070 :     double dfVal2 = 0.0;
    3653     8384070 :     if (dfAbsX_2 <= 1.0)
    3654     6644980 :         dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
    3655     1739090 :     else if (dfAbsX_2 <= 2.0)
    3656     1739090 :         dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
    3657             : 
    3658     8384070 :     double dfVal3 = 0.0;
    3659     8384070 :     if (dfAbsX_3 <= 1.0)
    3660     3706150 :         dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
    3661     4677910 :     else if (dfAbsX_3 <= 2.0)
    3662     4304680 :         dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
    3663             : 
    3664     8384070 :     padfValues[0] = dfVal0;
    3665     8384070 :     padfValues[1] = dfVal1;
    3666     8384070 :     padfValues[2] = dfVal2;
    3667     8384070 :     padfValues[3] = dfVal3;
    3668     8384070 :     return dfVal0 + dfVal1 + dfVal2 + dfVal3;
    3669             : }
    3670             : 
    3671             : /************************************************************************/
    3672             : /*                           GWKBSpline()                               */
    3673             : /************************************************************************/
    3674             : 
    3675             : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
    3676             : // Equation 8 with (B,C)=(1,0)
    3677             : // 1/6 * ( 3 * |x|^3 -  6 * |x|^2 + 4) |x| < 1
    3678             : // 1/6 * ( -|x|^3 + 6 |x|^2  - 12|x| + 8) |x| >= 1 and |x| < 2
    3679             : 
    3680      139200 : static double GWKBSpline(double x)
    3681             : {
    3682      139200 :     const double xp2 = x + 2.0;
    3683      139200 :     const double xp1 = x + 1.0;
    3684      139200 :     const double xm1 = x - 1.0;
    3685             : 
    3686             :     // This will most likely be used, so we'll compute it ahead of time to
    3687             :     // avoid stalling the processor.
    3688      139200 :     const double xp2c = xp2 * xp2 * xp2;
    3689             : 
    3690             :     // Note that the test is computed only if it is needed.
    3691             :     // TODO(schwehr): Make this easier to follow.
    3692             :     return xp2 > 0.0
    3693      278400 :                ? ((xp1 > 0.0)
    3694      139200 :                       ? ((x > 0.0)
    3695      124806 :                              ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3696       90308 :                                    6.0 * x * x * x
    3697             :                              : 0.0) +
    3698      124806 :                             -4.0 * xp1 * xp1 * xp1
    3699             :                       : 0.0) +
    3700             :                      xp2c
    3701      139200 :                : 0.0;  // * 0.166666666666666666666
    3702             : }
    3703             : 
    3704     2220680 : static double GWKBSpline4Values(double *padfValues)
    3705             : {
    3706    11103400 :     for (int i = 0; i < 4; i++)
    3707             :     {
    3708     8882740 :         const double x = padfValues[i];
    3709     8882740 :         const double xp2 = x + 2.0;
    3710     8882740 :         const double xp1 = x + 1.0;
    3711     8882740 :         const double xm1 = x - 1.0;
    3712             : 
    3713             :         // This will most likely be used, so we'll compute it ahead of time to
    3714             :         // avoid stalling the processor.
    3715     8882740 :         const double xp2c = xp2 * xp2 * xp2;
    3716             : 
    3717             :         // Note that the test is computed only if it is needed.
    3718             :         // TODO(schwehr): Make this easier to follow.
    3719     8882740 :         padfValues[i] =
    3720             :             (xp2 > 0.0)
    3721    17765500 :                 ? ((xp1 > 0.0)
    3722     8882740 :                        ? ((x > 0.0)
    3723     6661820 :                               ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3724     4438260 :                                     6.0 * x * x * x
    3725             :                               : 0.0) +
    3726     6661820 :                              -4.0 * xp1 * xp1 * xp1
    3727             :                        : 0.0) +
    3728             :                       xp2c
    3729             :                 : 0.0;  // * 0.166666666666666666666
    3730             :     }
    3731     2220680 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3732             : }
    3733             : /************************************************************************/
    3734             : /*                       GWKResampleWrkStruct                           */
    3735             : /************************************************************************/
    3736             : 
    3737             : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
    3738             : 
    3739             : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
    3740             :                                    double dfSrcX, double dfSrcY,
    3741             :                                    double *pdfDensity, double *pdfReal,
    3742             :                                    double *pdfImag,
    3743             :                                    GWKResampleWrkStruct *psWrkStruct);
    3744             : 
    3745             : struct _GWKResampleWrkStruct
    3746             : {
    3747             :     pfnGWKResampleType pfnGWKResample;
    3748             : 
    3749             :     // Space for saved X weights.
    3750             :     double *padfWeightsX;
    3751             :     bool *pabCalcX;
    3752             : 
    3753             :     double *padfWeightsY;       // Only used by GWKResampleOptimizedLanczos.
    3754             :     int iLastSrcX;              // Only used by GWKResampleOptimizedLanczos.
    3755             :     int iLastSrcY;              // Only used by GWKResampleOptimizedLanczos.
    3756             :     double dfLastDeltaX;        // Only used by GWKResampleOptimizedLanczos.
    3757             :     double dfLastDeltaY;        // Only used by GWKResampleOptimizedLanczos.
    3758             :     double dfCosPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3759             :     double dfSinPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3760             :     double dfCosPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3761             :     double dfSinPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3762             :     double dfCosPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3763             :     double dfSinPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3764             :     double dfCosPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3765             :     double dfSinPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3766             : 
    3767             :     // Space for saving a row of pixels.
    3768             :     double *padfRowDensity;
    3769             :     double *padfRowReal;
    3770             :     double *padfRowImag;
    3771             : };
    3772             : 
    3773             : /************************************************************************/
    3774             : /*                    GWKResampleCreateWrkStruct()                      */
    3775             : /************************************************************************/
    3776             : 
    3777             : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3778             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3779             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
    3780             : 
    3781             : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3782             :                                         double dfSrcX, double dfSrcY,
    3783             :                                         double *pdfDensity, double *pdfReal,
    3784             :                                         double *pdfImag,
    3785             :                                         GWKResampleWrkStruct *psWrkStruct);
    3786             : 
    3787         397 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
    3788             : {
    3789         397 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3790         397 :     const int nYDist = (poWK->nYRadius + 1) * 2;
    3791             : 
    3792             :     GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
    3793         397 :         CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
    3794             : 
    3795             :     // Alloc space for saved X weights.
    3796         397 :     psWrkStruct->padfWeightsX =
    3797         397 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3798         397 :     psWrkStruct->pabCalcX =
    3799         397 :         static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
    3800             : 
    3801         397 :     psWrkStruct->padfWeightsY =
    3802         397 :         static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
    3803         397 :     psWrkStruct->iLastSrcX = -10;
    3804         397 :     psWrkStruct->iLastSrcY = -10;
    3805         397 :     psWrkStruct->dfLastDeltaX = -10;
    3806         397 :     psWrkStruct->dfLastDeltaY = -10;
    3807             : 
    3808             :     // Alloc space for saving a row of pixels.
    3809         397 :     if (poWK->pafUnifiedSrcDensity == nullptr &&
    3810         363 :         poWK->panUnifiedSrcValid == nullptr &&
    3811         340 :         poWK->papanBandSrcValid == nullptr)
    3812             :     {
    3813         340 :         psWrkStruct->padfRowDensity = nullptr;
    3814             :     }
    3815             :     else
    3816             :     {
    3817          57 :         psWrkStruct->padfRowDensity =
    3818          57 :             static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3819             :     }
    3820         397 :     psWrkStruct->padfRowReal =
    3821         397 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3822         397 :     psWrkStruct->padfRowImag =
    3823         397 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3824             : 
    3825         397 :     if (poWK->eResample == GRA_Lanczos)
    3826             :     {
    3827          63 :         psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
    3828             : 
    3829          63 :         if (poWK->dfXScale < 1)
    3830             :         {
    3831           4 :             psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
    3832           4 :             psWrkStruct->dfSinPiXScaleOver3 =
    3833           4 :                 sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
    3834           4 :                              psWrkStruct->dfCosPiXScaleOver3);
    3835             :             // "Naive":
    3836             :             // const double dfCosPiXScale = cos(  M_PI * dfXScale );
    3837             :             // const double dfSinPiXScale = sin(  M_PI * dfXScale );
    3838             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3839           4 :             psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
    3840           4 :                                               psWrkStruct->dfCosPiXScaleOver3 -
    3841           4 :                                           3) *
    3842           4 :                                          psWrkStruct->dfCosPiXScaleOver3;
    3843           4 :             psWrkStruct->dfSinPiXScale = sqrt(
    3844           4 :                 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
    3845             :         }
    3846             : 
    3847          63 :         if (poWK->dfYScale < 1)
    3848             :         {
    3849          11 :             psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
    3850          11 :             psWrkStruct->dfSinPiYScaleOver3 =
    3851          11 :                 sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
    3852          11 :                              psWrkStruct->dfCosPiYScaleOver3);
    3853             :             // "Naive":
    3854             :             // const double dfCosPiYScale = cos(  M_PI * dfYScale );
    3855             :             // const double dfSinPiYScale = sin(  M_PI * dfYScale );
    3856             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3857          11 :             psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
    3858          11 :                                               psWrkStruct->dfCosPiYScaleOver3 -
    3859          11 :                                           3) *
    3860          11 :                                          psWrkStruct->dfCosPiYScaleOver3;
    3861          11 :             psWrkStruct->dfSinPiYScale = sqrt(
    3862          11 :                 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
    3863             :         }
    3864             :     }
    3865             :     else
    3866         334 :         psWrkStruct->pfnGWKResample = GWKResample;
    3867             : 
    3868         397 :     return psWrkStruct;
    3869             : }
    3870             : 
    3871             : /************************************************************************/
    3872             : /*                    GWKResampleDeleteWrkStruct()                      */
    3873             : /************************************************************************/
    3874             : 
    3875         397 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
    3876             : {
    3877         397 :     CPLFree(psWrkStruct->padfWeightsX);
    3878         397 :     CPLFree(psWrkStruct->padfWeightsY);
    3879         397 :     CPLFree(psWrkStruct->pabCalcX);
    3880         397 :     CPLFree(psWrkStruct->padfRowDensity);
    3881         397 :     CPLFree(psWrkStruct->padfRowReal);
    3882         397 :     CPLFree(psWrkStruct->padfRowImag);
    3883         397 :     CPLFree(psWrkStruct);
    3884         397 : }
    3885             : 
    3886             : /************************************************************************/
    3887             : /*                           GWKResample()                              */
    3888             : /************************************************************************/
    3889             : 
    3890      239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3891             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3892             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
    3893             : 
    3894             : {
    3895             :     // Save as local variables to avoid following pointers in loops.
    3896      239383 :     const int nSrcXSize = poWK->nSrcXSize;
    3897      239383 :     const int nSrcYSize = poWK->nSrcYSize;
    3898             : 
    3899      239383 :     double dfAccumulatorReal = 0.0;
    3900      239383 :     double dfAccumulatorImag = 0.0;
    3901      239383 :     double dfAccumulatorDensity = 0.0;
    3902      239383 :     double dfAccumulatorWeight = 0.0;
    3903      239383 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3904      239383 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3905      239383 :     const GPtrDiff_t iSrcOffset =
    3906      239383 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    3907      239383 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3908      239383 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3909             : 
    3910      239383 :     const double dfXScale = poWK->dfXScale;
    3911      239383 :     const double dfYScale = poWK->dfYScale;
    3912             : 
    3913      239383 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3914             : 
    3915             :     // Space for saved X weights.
    3916      239383 :     double *padfWeightsX = psWrkStruct->padfWeightsX;
    3917      239383 :     bool *pabCalcX = psWrkStruct->pabCalcX;
    3918             : 
    3919             :     // Space for saving a row of pixels.
    3920      239383 :     double *padfRowDensity = psWrkStruct->padfRowDensity;
    3921      239383 :     double *padfRowReal = psWrkStruct->padfRowReal;
    3922      239383 :     double *padfRowImag = psWrkStruct->padfRowImag;
    3923             : 
    3924             :     // Mark as needing calculation (don't calculate the weights yet,
    3925             :     // because a mask may render it unnecessary).
    3926      239383 :     memset(pabCalcX, false, nXDist * sizeof(bool));
    3927             : 
    3928      239383 :     FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
    3929      239383 :     CPLAssert(pfnGetWeight);
    3930             : 
    3931             :     // Skip sampling over edge of image.
    3932      239383 :     int j = poWK->nFiltInitY;
    3933      239383 :     int jMax = poWK->nYRadius;
    3934      239383 :     if (iSrcY + j < 0)
    3935         566 :         j = -iSrcY;
    3936      239383 :     if (iSrcY + jMax >= nSrcYSize)
    3937         662 :         jMax = nSrcYSize - iSrcY - 1;
    3938             : 
    3939      239383 :     int iMin = poWK->nFiltInitX;
    3940      239383 :     int iMax = poWK->nXRadius;
    3941      239383 :     if (iSrcX + iMin < 0)
    3942         566 :         iMin = -iSrcX;
    3943      239383 :     if (iSrcX + iMax >= nSrcXSize)
    3944         659 :         iMax = nSrcXSize - iSrcX - 1;
    3945             : 
    3946      239383 :     const int bXScaleBelow1 = (dfXScale < 1.0);
    3947      239383 :     const int bYScaleBelow1 = (dfYScale < 1.0);
    3948             : 
    3949      239383 :     GPtrDiff_t iRowOffset =
    3950      239383 :         iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
    3951             : 
    3952             :     // Loop over pixel rows in the kernel.
    3953     1445930 :     for (; j <= jMax; ++j)
    3954             :     {
    3955     1206540 :         iRowOffset += nSrcXSize;
    3956             : 
    3957             :         // Get pixel values.
    3958             :         // We can potentially read extra elements after the "normal" end of the
    3959             :         // source arrays, but the contract of papabySrcImage[iBand],
    3960             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    3961             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    3962     1206540 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    3963             :                             padfRowDensity, padfRowReal, padfRowImag))
    3964          72 :             continue;
    3965             : 
    3966             :         // Calculate the Y weight.
    3967             :         double dfWeight1 = (bYScaleBelow1)
    3968     1206470 :                                ? pfnGetWeight((j - dfDeltaY) * dfYScale)
    3969        1600 :                                : pfnGetWeight(j - dfDeltaY);
    3970             : 
    3971             :         // Iterate over pixels in row.
    3972     1206470 :         double dfAccumulatorRealLocal = 0.0;
    3973     1206470 :         double dfAccumulatorImagLocal = 0.0;
    3974     1206470 :         double dfAccumulatorDensityLocal = 0.0;
    3975     1206470 :         double dfAccumulatorWeightLocal = 0.0;
    3976             : 
    3977     7317420 :         for (int i = iMin; i <= iMax; ++i)
    3978             :         {
    3979             :             // Skip sampling if pixel has zero density.
    3980     6110940 :             if (padfRowDensity != nullptr &&
    3981       77277 :                 padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
    3982         546 :                 continue;
    3983             : 
    3984     6110400 :             double dfWeight2 = 0.0;
    3985             : 
    3986             :             // Make or use a cached set of weights for this row.
    3987     6110400 :             if (pabCalcX[i - iMin])
    3988             :             {
    3989             :                 // Use saved weight value instead of recomputing it.
    3990     4903920 :                 dfWeight2 = padfWeightsX[i - iMin];
    3991             :             }
    3992             :             else
    3993             :             {
    3994             :                 // Calculate & save the X weight.
    3995     1206480 :                 padfWeightsX[i - iMin] = dfWeight2 =
    3996     1206480 :                     (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
    3997        1600 :                                     : pfnGetWeight(i - dfDeltaX);
    3998             : 
    3999     1206480 :                 pabCalcX[i - iMin] = true;
    4000             :             }
    4001             : 
    4002             :             // Accumulate!
    4003     6110400 :             dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
    4004     6110400 :             dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
    4005     6110400 :             if (padfRowDensity != nullptr)
    4006       76731 :                 dfAccumulatorDensityLocal +=
    4007       76731 :                     padfRowDensity[i - iMin] * dfWeight2;
    4008     6110400 :             dfAccumulatorWeightLocal += dfWeight2;
    4009             :         }
    4010             : 
    4011     1206470 :         dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
    4012     1206470 :         dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
    4013     1206470 :         dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
    4014     1206470 :         dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
    4015             :     }
    4016             : 
    4017      239383 :     if (dfAccumulatorWeight < 0.000001 ||
    4018        1887 :         (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
    4019             :     {
    4020           0 :         *pdfDensity = 0.0;
    4021           0 :         return false;
    4022             :     }
    4023             : 
    4024             :     // Calculate the output taking into account weighting.
    4025      239383 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4026             :     {
    4027      239380 :         *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
    4028      239380 :         *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
    4029      239380 :         if (padfRowDensity != nullptr)
    4030        1884 :             *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
    4031             :         else
    4032      237496 :             *pdfDensity = 1.0;
    4033             :     }
    4034             :     else
    4035             :     {
    4036           3 :         *pdfReal = dfAccumulatorReal;
    4037           3 :         *pdfImag = dfAccumulatorImag;
    4038           3 :         if (padfRowDensity != nullptr)
    4039           3 :             *pdfDensity = dfAccumulatorDensity;
    4040             :         else
    4041           0 :             *pdfDensity = 1.0;
    4042             :     }
    4043             : 
    4044      239383 :     return true;
    4045             : }
    4046             : 
    4047             : /************************************************************************/
    4048             : /*                      GWKResampleOptimizedLanczos()                   */
    4049             : /************************************************************************/
    4050             : 
    4051      617144 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    4052             :                                         double dfSrcX, double dfSrcY,
    4053             :                                         double *pdfDensity, double *pdfReal,
    4054             :                                         double *pdfImag,
    4055             :                                         GWKResampleWrkStruct *psWrkStruct)
    4056             : 
    4057             : {
    4058             :     // Save as local variables to avoid following pointers in loops.
    4059      617144 :     const int nSrcXSize = poWK->nSrcXSize;
    4060      617144 :     const int nSrcYSize = poWK->nSrcYSize;
    4061             : 
    4062      617144 :     double dfAccumulatorReal = 0.0;
    4063      617144 :     double dfAccumulatorImag = 0.0;
    4064      617144 :     double dfAccumulatorDensity = 0.0;
    4065      617144 :     double dfAccumulatorWeight = 0.0;
    4066      617144 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4067      617144 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4068      617144 :     const GPtrDiff_t iSrcOffset =
    4069      617144 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4070      617144 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4071      617144 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4072             : 
    4073      617144 :     const double dfXScale = poWK->dfXScale;
    4074      617144 :     const double dfYScale = poWK->dfYScale;
    4075             : 
    4076             :     // Space for saved X weights.
    4077      617144 :     double *const padfWeightsXShifted =
    4078      617144 :         psWrkStruct->padfWeightsX - poWK->nFiltInitX;
    4079      617144 :     double *const padfWeightsYShifted =
    4080      617144 :         psWrkStruct->padfWeightsY - poWK->nFiltInitY;
    4081             : 
    4082             :     // Space for saving a row of pixels.
    4083      617144 :     double *const padfRowDensity = psWrkStruct->padfRowDensity;
    4084      617144 :     double *const padfRowReal = psWrkStruct->padfRowReal;
    4085      617144 :     double *const padfRowImag = psWrkStruct->padfRowImag;
    4086             : 
    4087             :     // Skip sampling over edge of image.
    4088      617144 :     int jMin = poWK->nFiltInitY;
    4089      617144 :     int jMax = poWK->nYRadius;
    4090      617144 :     if (iSrcY + jMin < 0)
    4091       16522 :         jMin = -iSrcY;
    4092      617144 :     if (iSrcY + jMax >= nSrcYSize)
    4093        5782 :         jMax = nSrcYSize - iSrcY - 1;
    4094             : 
    4095      617144 :     int iMin = poWK->nFiltInitX;
    4096      617144 :     int iMax = poWK->nXRadius;
    4097      617144 :     if (iSrcX + iMin < 0)
    4098       15797 :         iMin = -iSrcX;
    4099      617144 :     if (iSrcX + iMax >= nSrcXSize)
    4100        4657 :         iMax = nSrcXSize - iSrcX - 1;
    4101             : 
    4102      617144 :     if (dfXScale < 1.0)
    4103             :     {
    4104      403041 :         while ((iMin - dfDeltaX) * dfXScale < -3.0)
    4105      200179 :             iMin++;
    4106      202862 :         while ((iMax - dfDeltaX) * dfXScale > 3.0)
    4107           0 :             iMax--;
    4108             : 
    4109             :         // clang-format off
    4110             :         /*
    4111             :         Naive version:
    4112             :         for (int i = iMin; i <= iMax; ++i)
    4113             :         {
    4114             :             psWrkStruct->padfWeightsXShifted[i] =
    4115             :                 GWKLanczosSinc((i - dfDeltaX) * dfXScale);
    4116             :         }
    4117             : 
    4118             :         but given that:
    4119             : 
    4120             :         GWKLanczosSinc(x):
    4121             :             if (dfX == 0.0)
    4122             :                 return 1.0;
    4123             : 
    4124             :             const double dfPIX = M_PI * dfX;
    4125             :             const double dfPIXoverR = dfPIX / 3;
    4126             :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    4127             :             return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
    4128             : 
    4129             :         and
    4130             :             sin (a + b) = sin a cos b + cos a sin b.
    4131             :             cos (a + b) = cos a cos b - sin a sin b.
    4132             : 
    4133             :         we can skip any sin() computation within the loop
    4134             :         */
    4135             :         // clang-format on
    4136             : 
    4137      202862 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    4138      131072 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    4139             :         {
    4140       71790 :             double dfX = (iMin - dfDeltaX) * dfXScale;
    4141             : 
    4142       71790 :             double dfPIXover3 = M_PI / 3 * dfX;
    4143       71790 :             double dfCosOver3 = cos(dfPIXover3);
    4144       71790 :             double dfSinOver3 = sin(dfPIXover3);
    4145             : 
    4146             :             // "Naive":
    4147             :             // double dfSin = sin( M_PI * dfX );
    4148             :             // double dfCos = cos( M_PI * dfX );
    4149             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4150       71790 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4151       71790 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4152             : 
    4153       71790 :             const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
    4154       71790 :             const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
    4155       71790 :             const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
    4156       71790 :             const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
    4157       71790 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4158       71790 :             padfWeightsXShifted[iMin] =
    4159       71790 :                 dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
    4160     1636480 :             for (int i = iMin + 1; i <= iMax; ++i)
    4161             :             {
    4162     1564690 :                 dfX += dfXScale;
    4163     1564690 :                 const double dfNewSin =
    4164     1564690 :                     dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
    4165     1564690 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
    4166     1564690 :                                              dfCosOver3 * dfSinPiXScaleOver3;
    4167     1564690 :                 padfWeightsXShifted[i] =
    4168             :                     dfX == 0
    4169     1564690 :                         ? 1.0
    4170     1564690 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
    4171     1564690 :                 const double dfNewCos =
    4172     1564690 :                     dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
    4173     1564690 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
    4174     1564690 :                                              dfSinOver3 * dfSinPiXScaleOver3;
    4175     1564690 :                 dfSin = dfNewSin;
    4176     1564690 :                 dfCos = dfNewCos;
    4177     1564690 :                 dfSinOver3 = dfNewSinOver3;
    4178     1564690 :                 dfCosOver3 = dfNewCosOver3;
    4179             :             }
    4180             : 
    4181       71790 :             psWrkStruct->iLastSrcX = iSrcX;
    4182       71790 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4183             :         }
    4184             :     }
    4185             :     else
    4186             :     {
    4187      757542 :         while (iMin - dfDeltaX < -3.0)
    4188      343260 :             iMin++;
    4189      414282 :         while (iMax - dfDeltaX > 3.0)
    4190           0 :             iMax--;
    4191             : 
    4192      414282 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    4193      209580 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    4194             :         {
    4195             :             // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
    4196             :             // following trigonometric formulas.
    4197             : 
    4198             :             // TODO(schwehr): Move this somewhere where it can be rendered at
    4199             :             // LaTeX.
    4200             :             // clang-format off
    4201             :             // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
    4202             :             //                            cos(M_PI * dfBase) * sin(M_PI * k)
    4203             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
    4204             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
    4205             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
    4206             : 
    4207             :             // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
    4208             :             //                                  cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
    4209             :             // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
    4210             :             // clang-format on
    4211             : 
    4212      414282 :             const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
    4213      414282 :             const double dfSin2PIDeltaXOver3 =
    4214             :                 dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
    4215             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
    4216      414282 :             const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
    4217      414282 :             const double dfSinPIDeltaX =
    4218      414282 :                 (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
    4219      414282 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4220      414282 :             const double dfInvPI2Over3xSinPIDeltaX =
    4221             :                 dfInvPI2Over3 * dfSinPIDeltaX;
    4222      414282 :             const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
    4223      414282 :                 -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
    4224      414282 :             const double dfSinPIOver3 = 0.8660254037844386;
    4225      414282 :             const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
    4226      414282 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
    4227             :             const double padfCst[] = {
    4228      414282 :                 dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
    4229      414282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
    4230             :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
    4231      414282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
    4232      414282 :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
    4233             : 
    4234     2936860 :             for (int i = iMin; i <= iMax; ++i)
    4235             :             {
    4236     2522570 :                 const double dfX = i - dfDeltaX;
    4237     2522570 :                 if (dfX == 0.0)
    4238       58282 :                     padfWeightsXShifted[i] = 1.0;
    4239             :                 else
    4240     2464290 :                     padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
    4241             : #if DEBUG_VERBOSE
    4242             :                     // TODO(schwehr): AlmostEqual.
    4243             :                     // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
    4244             :                     //               GWKLanczosSinc(dfX, 3.0)) < 1e-10);
    4245             : #endif
    4246             :             }
    4247             : 
    4248      414282 :             psWrkStruct->iLastSrcX = iSrcX;
    4249      414282 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4250             :         }
    4251             :     }
    4252             : 
    4253      617144 :     if (dfYScale < 1.0)
    4254             :     {
    4255      403116 :         while ((jMin - dfDeltaY) * dfYScale < -3.0)
    4256      200254 :             jMin++;
    4257      202862 :         while ((jMax - dfDeltaY) * dfYScale > 3.0)
    4258           0 :             jMax--;
    4259             : 
    4260             :         // clang-format off
    4261             :         /*
    4262             :         Naive version:
    4263             :         for (int j = jMin; j <= jMax; ++j)
    4264             :         {
    4265             :             padfWeightsYShifted[j] =
    4266             :                 GWKLanczosSinc((j - dfDeltaY) * dfYScale);
    4267             :         }
    4268             :         */
    4269             :         // clang-format on
    4270             : 
    4271      202862 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4272      202479 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4273             :         {
    4274         383 :             double dfY = (jMin - dfDeltaY) * dfYScale;
    4275             : 
    4276         383 :             double dfPIYover3 = M_PI / 3 * dfY;
    4277         383 :             double dfCosOver3 = cos(dfPIYover3);
    4278         383 :             double dfSinOver3 = sin(dfPIYover3);
    4279             : 
    4280             :             // "Naive":
    4281             :             // double dfSin = sin( M_PI * dfY );
    4282             :             // double dfCos = cos( M_PI * dfY );
    4283             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4284         383 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4285         383 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4286             : 
    4287         383 :             const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
    4288         383 :             const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
    4289         383 :             const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
    4290         383 :             const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
    4291         383 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4292         383 :             padfWeightsYShifted[jMin] =
    4293         383 :                 dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
    4294        7318 :             for (int j = jMin + 1; j <= jMax; ++j)
    4295             :             {
    4296        6935 :                 dfY += dfYScale;
    4297        6935 :                 const double dfNewSin =
    4298        6935 :                     dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
    4299        6935 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
    4300        6935 :                                              dfCosOver3 * dfSinPiYScaleOver3;
    4301        6935 :                 padfWeightsYShifted[j] =
    4302             :                     dfY == 0
    4303        6935 :                         ? 1.0
    4304        6935 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
    4305        6935 :                 const double dfNewCos =
    4306        6935 :                     dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
    4307        6935 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
    4308        6935 :                                              dfSinOver3 * dfSinPiYScaleOver3;
    4309        6935 :                 dfSin = dfNewSin;
    4310        6935 :                 dfCos = dfNewCos;
    4311        6935 :                 dfSinOver3 = dfNewSinOver3;
    4312        6935 :                 dfCosOver3 = dfNewCosOver3;
    4313             :             }
    4314             : 
    4315         383 :             psWrkStruct->iLastSrcY = iSrcY;
    4316         383 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4317             :         }
    4318             :     }
    4319             :     else
    4320             :     {
    4321      684742 :         while (jMin - dfDeltaY < -3.0)
    4322      270460 :             jMin++;
    4323      414282 :         while (jMax - dfDeltaY > 3.0)
    4324           0 :             jMax--;
    4325             : 
    4326      414282 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4327      413663 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4328             :         {
    4329        1132 :             const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
    4330        1132 :             const double dfSin2PIDeltaYOver3 =
    4331             :                 dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
    4332             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
    4333        1132 :             const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
    4334        1132 :             const double dfSinPIDeltaY =
    4335        1132 :                 (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
    4336        1132 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4337        1132 :             const double dfInvPI2Over3xSinPIDeltaY =
    4338             :                 dfInvPI2Over3 * dfSinPIDeltaY;
    4339        1132 :             const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
    4340        1132 :                 -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
    4341        1132 :             const double dfSinPIOver3 = 0.8660254037844386;
    4342        1132 :             const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
    4343        1132 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
    4344             :             const double padfCst[] = {
    4345        1132 :                 dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
    4346        1132 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
    4347             :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
    4348        1132 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
    4349        1132 :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
    4350             : 
    4351        7917 :             for (int j = jMin; j <= jMax; ++j)
    4352             :             {
    4353        6785 :                 const double dfY = j - dfDeltaY;
    4354        6785 :                 if (dfY == 0.0)
    4355         460 :                     padfWeightsYShifted[j] = 1.0;
    4356             :                 else
    4357        6325 :                     padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
    4358             : #if DEBUG_VERBOSE
    4359             :                     // TODO(schwehr): AlmostEqual.
    4360             :                     // CPLAssert(fabs(padfWeightsYShifted[j] -
    4361             :                     //               GWKLanczosSinc(dfY, 3.0)) < 1e-10);
    4362             : #endif
    4363             :             }
    4364             : 
    4365        1132 :             psWrkStruct->iLastSrcY = iSrcY;
    4366        1132 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4367             :         }
    4368             :     }
    4369             : 
    4370             :     // If we have no density information, we can simply compute the
    4371             :     // accumulated weight.
    4372      617144 :     if (padfRowDensity == nullptr)
    4373             :     {
    4374      617144 :         double dfRowAccWeight = 0.0;
    4375     7903490 :         for (int i = iMin; i <= iMax; ++i)
    4376             :         {
    4377     7286350 :             dfRowAccWeight += padfWeightsXShifted[i];
    4378             :         }
    4379      617144 :         double dfColAccWeight = 0.0;
    4380     7958040 :         for (int j = jMin; j <= jMax; ++j)
    4381             :         {
    4382     7340900 :             dfColAccWeight += padfWeightsYShifted[j];
    4383             :         }
    4384      617144 :         dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
    4385             :     }
    4386             : 
    4387             :     // Loop over pixel rows in the kernel.
    4388             : 
    4389      617144 :     if (poWK->eWorkingDataType == GDT_UInt8 && !poWK->panUnifiedSrcValid &&
    4390      616524 :         !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
    4391             :         !padfRowDensity)
    4392             :     {
    4393             :         // Optimization for Byte case without any masking/alpha
    4394             : 
    4395      616524 :         if (dfAccumulatorWeight < 0.000001)
    4396             :         {
    4397           0 :             *pdfDensity = 0.0;
    4398           0 :             return false;
    4399             :         }
    4400             : 
    4401      616524 :         const GByte *pSrc =
    4402      616524 :             reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
    4403      616524 :         pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4404             : 
    4405             : #if defined(USE_SSE2)
    4406      616524 :         if (iMax - iMin + 1 == 6)
    4407             :         {
    4408             :             // This is just an optimized version of the general case in
    4409             :             // the else clause.
    4410             : 
    4411      346854 :             pSrc += iMin;
    4412      346854 :             int j = jMin;
    4413             :             const auto fourXWeights =
    4414      346854 :                 XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
    4415             : 
    4416             :             // Process 2 lines at the same time.
    4417     1375860 :             for (; j < jMax; j += 2)
    4418             :             {
    4419             :                 const XMMReg4Double v_acc =
    4420     1029000 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4421             :                 const XMMReg4Double v_acc2 =
    4422     1029000 :                     XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
    4423     1029000 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4424     1029000 :                 const double dfRowAccEnd =
    4425     1029000 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4426     1029000 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4427     1029000 :                 dfAccumulatorReal +=
    4428     1029000 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4429     1029000 :                 const double dfRowAcc2 = v_acc2.GetHorizSum();
    4430     1029000 :                 const double dfRowAcc2End =
    4431     1029000 :                     pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
    4432     1029000 :                     pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
    4433     1029000 :                 dfAccumulatorReal +=
    4434     1029000 :                     (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
    4435     1029000 :                 pSrc += 2 * nSrcXSize;
    4436             :             }
    4437      346854 :             if (j == jMax)
    4438             :             {
    4439             :                 // Process last line if there's an odd number of them.
    4440             : 
    4441             :                 const XMMReg4Double v_acc =
    4442       86045 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4443       86045 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4444       86045 :                 const double dfRowAccEnd =
    4445       86045 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4446       86045 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4447       86045 :                 dfAccumulatorReal +=
    4448       86045 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4449             :             }
    4450             :         }
    4451             :         else
    4452             : #endif
    4453             :         {
    4454     5463580 :             for (int j = jMin; j <= jMax; ++j)
    4455             :             {
    4456     5193900 :                 int i = iMin;
    4457     5193900 :                 double dfRowAcc1 = 0.0;
    4458     5193900 :                 double dfRowAcc2 = 0.0;
    4459             :                 // A bit of loop unrolling
    4460    62750600 :                 for (; i < iMax; i += 2)
    4461             :                 {
    4462    57556700 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4463    57556700 :                     dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
    4464             :                 }
    4465     5193900 :                 if (i == iMax)
    4466             :                 {
    4467             :                     // Process last column if there's an odd number of them.
    4468      426183 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4469             :                 }
    4470             : 
    4471     5193900 :                 dfAccumulatorReal +=
    4472     5193900 :                     (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
    4473     5193900 :                 pSrc += nSrcXSize;
    4474             :             }
    4475             :         }
    4476             : 
    4477             :         // Calculate the output taking into account weighting.
    4478      616524 :         if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4479             :         {
    4480      569230 :             const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4481      569230 :             *pdfReal = dfAccumulatorReal * dfInvAcc;
    4482      569230 :             *pdfDensity = 1.0;
    4483             :         }
    4484             :         else
    4485             :         {
    4486       47294 :             *pdfReal = dfAccumulatorReal;
    4487       47294 :             *pdfDensity = 1.0;
    4488             :         }
    4489             : 
    4490      616524 :         return true;
    4491             :     }
    4492             : 
    4493         620 :     GPtrDiff_t iRowOffset =
    4494         620 :         iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
    4495             : 
    4496         620 :     int nCountValid = 0;
    4497         620 :     const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
    4498             : 
    4499        3560 :     for (int j = jMin; j <= jMax; ++j)
    4500             :     {
    4501        2940 :         iRowOffset += nSrcXSize;
    4502             : 
    4503             :         // Get pixel values.
    4504             :         // We can potentially read extra elements after the "normal" end of the
    4505             :         // source arrays, but the contract of papabySrcImage[iBand],
    4506             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4507             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4508        2940 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4509             :                             padfRowDensity, padfRowReal, padfRowImag))
    4510           0 :             continue;
    4511             : 
    4512        2940 :         const double dfWeight1 = padfWeightsYShifted[j];
    4513             : 
    4514             :         // Iterate over pixels in row.
    4515        2940 :         if (padfRowDensity != nullptr)
    4516             :         {
    4517           0 :             for (int i = iMin; i <= iMax; ++i)
    4518             :             {
    4519             :                 // Skip sampling if pixel has zero density.
    4520           0 :                 if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
    4521           0 :                     continue;
    4522             : 
    4523           0 :                 nCountValid++;
    4524             : 
    4525             :                 //  Use a cached set of weights for this row.
    4526           0 :                 const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
    4527             : 
    4528             :                 // Accumulate!
    4529           0 :                 dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
    4530           0 :                 dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
    4531           0 :                 dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
    4532           0 :                 dfAccumulatorWeight += dfWeight2;
    4533             :             }
    4534             :         }
    4535        2940 :         else if (bIsNonComplex)
    4536             :         {
    4537        1764 :             double dfRowAccReal = 0.0;
    4538       10560 :             for (int i = iMin; i <= iMax; ++i)
    4539             :             {
    4540        8796 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4541             : 
    4542             :                 // Accumulate!
    4543        8796 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4544             :             }
    4545             : 
    4546        1764 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4547             :         }
    4548             :         else
    4549             :         {
    4550        1176 :             double dfRowAccReal = 0.0;
    4551        1176 :             double dfRowAccImag = 0.0;
    4552        7040 :             for (int i = iMin; i <= iMax; ++i)
    4553             :             {
    4554        5864 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4555             : 
    4556             :                 // Accumulate!
    4557        5864 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4558        5864 :                 dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
    4559             :             }
    4560             : 
    4561        1176 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4562        1176 :             dfAccumulatorImag += dfRowAccImag * dfWeight1;
    4563             :         }
    4564             :     }
    4565             : 
    4566         620 :     if (dfAccumulatorWeight < 0.000001 ||
    4567           0 :         (padfRowDensity != nullptr &&
    4568           0 :          (dfAccumulatorDensity < 0.000001 ||
    4569           0 :           nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
    4570             :     {
    4571           0 :         *pdfDensity = 0.0;
    4572           0 :         return false;
    4573             :     }
    4574             : 
    4575             :     // Calculate the output taking into account weighting.
    4576         620 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4577             :     {
    4578           0 :         const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4579           0 :         *pdfReal = dfAccumulatorReal * dfInvAcc;
    4580           0 :         *pdfImag = dfAccumulatorImag * dfInvAcc;
    4581           0 :         if (padfRowDensity != nullptr)
    4582           0 :             *pdfDensity = dfAccumulatorDensity * dfInvAcc;
    4583             :         else
    4584           0 :             *pdfDensity = 1.0;
    4585             :     }
    4586             :     else
    4587             :     {
    4588         620 :         *pdfReal = dfAccumulatorReal;
    4589         620 :         *pdfImag = dfAccumulatorImag;
    4590         620 :         if (padfRowDensity != nullptr)
    4591           0 :             *pdfDensity = dfAccumulatorDensity;
    4592             :         else
    4593         620 :             *pdfDensity = 1.0;
    4594             :     }
    4595             : 
    4596         620 :     return true;
    4597             : }
    4598             : 
    4599             : /************************************************************************/
    4600             : /*                        GWKComputeWeights()                           */
    4601             : /************************************************************************/
    4602             : 
    4603     3881980 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
    4604             :                               double dfDeltaX, double dfXScale, int jMin,
    4605             :                               int jMax, double dfDeltaY, double dfYScale,
    4606             :                               double *padfWeightsHorizontal,
    4607             :                               double *padfWeightsVertical, double &dfInvWeights)
    4608             : {
    4609             : 
    4610     3881980 :     const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
    4611     3881980 :     CPLAssert(pfnGetWeight);
    4612     3881980 :     const FilterFunc4ValuesType pfnGetWeight4Values =
    4613     3881980 :         apfGWKFilter4Values[eResample];
    4614     3881980 :     CPLAssert(pfnGetWeight4Values);
    4615             : 
    4616     3881980 :     int i = iMin;  // Used after for.
    4617     3881980 :     int iC = 0;    // Used after for.
    4618             :     // Not zero, but as close as possible to it, to avoid potential division by
    4619             :     // zero at end of function
    4620     3881980 :     double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
    4621     8714600 :     for (; i + 2 < iMax; i += 4, iC += 4)
    4622             :     {
    4623     4832620 :         padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
    4624     4832620 :         padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
    4625     4832620 :         padfWeightsHorizontal[iC + 2] =
    4626     4832620 :             padfWeightsHorizontal[iC + 1] + dfXScale;
    4627     4832620 :         padfWeightsHorizontal[iC + 3] =
    4628     4832620 :             padfWeightsHorizontal[iC + 2] + dfXScale;
    4629     4832620 :         dfAccumulatorWeightHorizontal +=
    4630     4832620 :             pfnGetWeight4Values(padfWeightsHorizontal + iC);
    4631             :     }
    4632     4105160 :     for (; i <= iMax; ++i, ++iC)
    4633             :     {
    4634      223187 :         const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
    4635      223187 :         padfWeightsHorizontal[iC] = dfWeight;
    4636      223187 :         dfAccumulatorWeightHorizontal += dfWeight;
    4637             :     }
    4638             : 
    4639     3881980 :     int j = jMin;  // Used after for.
    4640     3881980 :     int jC = 0;    // Used after for.
    4641             :     // Not zero, but as close as possible to it, to avoid potential division by
    4642             :     // zero at end of function
    4643     3881980 :     double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
    4644     8166130 :     for (; j + 2 < jMax; j += 4, jC += 4)
    4645             :     {
    4646     4284160 :         padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
    4647     4284160 :         padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
    4648     4284160 :         padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
    4649     4284160 :         padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
    4650     4284160 :         dfAccumulatorWeightVertical +=
    4651     4284160 :             pfnGetWeight4Values(padfWeightsVertical + jC);
    4652             :     }
    4653     8411130 :     for (; j <= jMax; ++j, ++jC)
    4654             :     {
    4655     4529160 :         const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
    4656     4529160 :         padfWeightsVertical[jC] = dfWeight;
    4657     4529160 :         dfAccumulatorWeightVertical += dfWeight;
    4658             :     }
    4659             : 
    4660     3881980 :     dfInvWeights =
    4661     3881980 :         1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
    4662     3881980 : }
    4663             : 
    4664             : /************************************************************************/
    4665             : /*                        GWKResampleNoMasksT()                         */
    4666             : /************************************************************************/
    4667             : 
    4668             : template <class T>
    4669             : static bool
    4670             : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    4671             :                     double dfSrcY, T *pValue, double *padfWeightsHorizontal,
    4672             :                     double *padfWeightsVertical, double &dfInvWeights)
    4673             : 
    4674             : {
    4675             :     // Commonly used; save locally.
    4676             :     const int nSrcXSize = poWK->nSrcXSize;
    4677             :     const int nSrcYSize = poWK->nSrcYSize;
    4678             : 
    4679             :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4680             :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4681             :     const GPtrDiff_t iSrcOffset =
    4682             :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4683             : 
    4684             :     const int nXRadius = poWK->nXRadius;
    4685             :     const int nYRadius = poWK->nYRadius;
    4686             : 
    4687             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4688             :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4689             :         nYRadius > nSrcYSize)
    4690             :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4691             :                                                   pValue);
    4692             : 
    4693             :     T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    4694             :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4695             :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4696             : 
    4697             :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4698             :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4699             : 
    4700             :     int iMin = 1 - nXRadius;
    4701             :     if (iSrcX + iMin < 0)
    4702             :         iMin = -iSrcX;
    4703             :     int iMax = nXRadius;
    4704             :     if (iSrcX + iMax >= nSrcXSize - 1)
    4705             :         iMax = nSrcXSize - 1 - iSrcX;
    4706             : 
    4707             :     int jMin = 1 - nYRadius;
    4708             :     if (iSrcY + jMin < 0)
    4709             :         jMin = -iSrcY;
    4710             :     int jMax = nYRadius;
    4711             :     if (iSrcY + jMax >= nSrcYSize - 1)
    4712             :         jMax = nSrcYSize - 1 - iSrcY;
    4713             : 
    4714             :     if (iBand == 0)
    4715             :     {
    4716             :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4717             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4718             :                           padfWeightsVertical, dfInvWeights);
    4719             :     }
    4720             : 
    4721             :     // Loop over all rows in the kernel.
    4722             :     double dfAccumulator = 0.0;
    4723             :     for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
    4724             :     {
    4725             :         const GPtrDiff_t iSampJ =
    4726             :             iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
    4727             : 
    4728             :         // Loop over all pixels in the row.
    4729             :         double dfAccumulatorLocal = 0.0;
    4730             :         double dfAccumulatorLocal2 = 0.0;
    4731             :         int iC = 0;
    4732             :         int i = iMin;
    4733             :         // Process by chunk of 4 cols.
    4734             :         for (; i + 2 < iMax; i += 4, iC += 4)
    4735             :         {
    4736             :             // Retrieve the pixel & accumulate.
    4737             :             dfAccumulatorLocal +=
    4738             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4739             :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    4740             :                                   padfWeightsHorizontal[iC + 1];
    4741             :             dfAccumulatorLocal2 += double(pSrcBand[i + 2 + iSampJ]) *
    4742             :                                    padfWeightsHorizontal[iC + 2];
    4743             :             dfAccumulatorLocal2 += double(pSrcBand[i + 3 + iSampJ]) *
    4744             :                                    padfWeightsHorizontal[iC + 3];
    4745             :         }
    4746             :         dfAccumulatorLocal += dfAccumulatorLocal2;
    4747             :         if (i < iMax)
    4748             :         {
    4749             :             dfAccumulatorLocal +=
    4750             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4751             :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    4752             :                                   padfWeightsHorizontal[iC + 1];
    4753             :             i += 2;
    4754             :             iC += 2;
    4755             :         }
    4756             :         if (i == iMax)
    4757             :         {
    4758             :             dfAccumulatorLocal +=
    4759             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4760             :         }
    4761             : 
    4762             :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4763             :     }
    4764             : 
    4765             :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4766             : 
    4767             :     return true;
    4768             : }
    4769             : 
    4770             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    4771             : /* Could possibly be used too on 32bit, but we would need to check at runtime */
    4772             : #if defined(USE_SSE2)
    4773             : 
    4774             : /************************************************************************/
    4775             : /*                    GWKResampleNoMasks_SSE2_T()                       */
    4776             : /************************************************************************/
    4777             : 
    4778             : template <class T>
    4779     9589853 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
    4780             :                                       double dfSrcX, double dfSrcY, T *pValue,
    4781             :                                       double *padfWeightsHorizontal,
    4782             :                                       double *padfWeightsVertical,
    4783             :                                       double &dfInvWeights)
    4784             : {
    4785             :     // Commonly used; save locally.
    4786     9589853 :     const int nSrcXSize = poWK->nSrcXSize;
    4787     9589853 :     const int nSrcYSize = poWK->nSrcYSize;
    4788             : 
    4789     9589853 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4790     9589853 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4791     9589853 :     const GPtrDiff_t iSrcOffset =
    4792     9589853 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4793     9589853 :     const int nXRadius = poWK->nXRadius;
    4794     9589853 :     const int nYRadius = poWK->nYRadius;
    4795             : 
    4796             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4797     9589853 :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4798             :         nYRadius > nSrcYSize)
    4799           3 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4800           3 :                                                   pValue);
    4801             : 
    4802     9589851 :     const T *pSrcBand =
    4803     9589851 :         reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    4804             : 
    4805     9589851 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4806     9589851 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4807     9589851 :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4808     9589851 :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4809             : 
    4810     9589851 :     int iMin = 1 - nXRadius;
    4811     9589851 :     if (iSrcX + iMin < 0)
    4812       46218 :         iMin = -iSrcX;
    4813     9589851 :     int iMax = nXRadius;
    4814     9589851 :     if (iSrcX + iMax >= nSrcXSize - 1)
    4815       42714 :         iMax = nSrcXSize - 1 - iSrcX;
    4816             : 
    4817     9589851 :     int jMin = 1 - nYRadius;
    4818     9589851 :     if (iSrcY + jMin < 0)
    4819       49554 :         jMin = -iSrcY;
    4820     9589851 :     int jMax = nYRadius;
    4821     9589851 :     if (iSrcY + jMax >= nSrcYSize - 1)
    4822       35683 :         jMax = nSrcYSize - 1 - iSrcY;
    4823             : 
    4824     9589851 :     if (iBand == 0)
    4825             :     {
    4826     3881981 :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4827             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4828             :                           padfWeightsVertical, dfInvWeights);
    4829             :     }
    4830             : 
    4831     9589851 :     GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4832             :     // Process by chunk of 4 rows.
    4833     9589851 :     int jC = 0;
    4834     9589851 :     int j = jMin;
    4835     9589851 :     double dfAccumulator = 0.0;
    4836    20264593 :     for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
    4837             :     {
    4838             :         // Loop over all pixels in the row.
    4839    10674692 :         int iC = 0;
    4840    10674692 :         int i = iMin;
    4841             :         // Process by chunk of 4 cols.
    4842    10674692 :         XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
    4843    10674692 :         XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
    4844    10674692 :         XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
    4845    10674692 :         XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
    4846    27984580 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4847             :         {
    4848             :             // Retrieve the pixel & accumulate.
    4849    17309788 :             XMMReg4Double v_pixels_1 =
    4850    17309788 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4851    17309788 :             XMMReg4Double v_pixels_2 =
    4852    17309788 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
    4853    17309788 :             XMMReg4Double v_pixels_3 =
    4854    17309788 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4855    17309788 :             XMMReg4Double v_pixels_4 =
    4856    17309788 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4857             : 
    4858    17309788 :             XMMReg4Double v_padfWeight =
    4859    17309788 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4860             : 
    4861    17309788 :             v_acc_1 += v_pixels_1 * v_padfWeight;
    4862    17309788 :             v_acc_2 += v_pixels_2 * v_padfWeight;
    4863    17309788 :             v_acc_3 += v_pixels_3 * v_padfWeight;
    4864    17309788 :             v_acc_4 += v_pixels_4 * v_padfWeight;
    4865             :         }
    4866             : 
    4867    10674692 :         if (i < iMax)
    4868             :         {
    4869      145982 :             XMMReg2Double v_pixels_1 =
    4870      145982 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
    4871      145982 :             XMMReg2Double v_pixels_2 =
    4872      145982 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
    4873      145982 :             XMMReg2Double v_pixels_3 =
    4874      145982 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4875      145982 :             XMMReg2Double v_pixels_4 =
    4876      145982 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4877             : 
    4878      145982 :             XMMReg2Double v_padfWeight =
    4879      145982 :                 XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
    4880             : 
    4881      145982 :             v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
    4882      145982 :             v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
    4883      145982 :             v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
    4884      145982 :             v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
    4885             : 
    4886      145982 :             i += 2;
    4887      145982 :             iC += 2;
    4888             :         }
    4889             : 
    4890    10674692 :         double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
    4891    10674692 :         double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
    4892    10674692 :         double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
    4893    10674692 :         double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
    4894             : 
    4895    10674692 :         if (i == iMax)
    4896             :         {
    4897       52267 :             dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
    4898       52267 :                                     padfWeightsHorizontal[iC];
    4899       52267 :             dfAccumulatorLocal_2 +=
    4900       52267 :                 static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
    4901       52267 :                 padfWeightsHorizontal[iC];
    4902       52267 :             dfAccumulatorLocal_3 +=
    4903       52267 :                 static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
    4904       52267 :                 padfWeightsHorizontal[iC];
    4905       52267 :             dfAccumulatorLocal_4 +=
    4906       52267 :                 static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
    4907       52267 :                 padfWeightsHorizontal[iC];
    4908             :         }
    4909             : 
    4910    10674692 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
    4911    10674692 :         dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
    4912    10674692 :         dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
    4913    10674692 :         dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
    4914             :     }
    4915    22749841 :     for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
    4916             :     {
    4917             :         // Loop over all pixels in the row.
    4918    13159940 :         int iC = 0;
    4919    13159940 :         int i = iMin;
    4920             :         // Process by chunk of 4 cols.
    4921    13159940 :         XMMReg4Double v_acc = XMMReg4Double::Zero();
    4922    26355663 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4923             :         {
    4924             :             // Retrieve the pixel & accumulate.
    4925    13195723 :             XMMReg4Double v_pixels =
    4926    13195723 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4927    13195723 :             XMMReg4Double v_padfWeight =
    4928    13195723 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4929             : 
    4930    13195723 :             v_acc += v_pixels * v_padfWeight;
    4931             :         }
    4932             : 
    4933    13159940 :         double dfAccumulatorLocal = v_acc.GetHorizSum();
    4934             : 
    4935    13159940 :         if (i < iMax)
    4936             :         {
    4937      173976 :             dfAccumulatorLocal +=
    4938      173976 :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4939      173976 :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    4940      173976 :                                   padfWeightsHorizontal[iC + 1];
    4941      173976 :             i += 2;
    4942      173976 :             iC += 2;
    4943             :         }
    4944    13159940 :         if (i == iMax)
    4945             :         {
    4946       33032 :             dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
    4947       33032 :                                   padfWeightsHorizontal[iC];
    4948             :         }
    4949             : 
    4950    13159940 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4951             :     }
    4952             : 
    4953     9589851 :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4954             : 
    4955     9589851 :     return true;
    4956             : }
    4957             : 
    4958             : /************************************************************************/
    4959             : /*                     GWKResampleNoMasksT<GByte>()                     */
    4960             : /************************************************************************/
    4961             : 
    4962             : template <>
    4963     8991350 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
    4964             :                                 double dfSrcX, double dfSrcY, GByte *pValue,
    4965             :                                 double *padfWeightsHorizontal,
    4966             :                                 double *padfWeightsVertical,
    4967             :                                 double &dfInvWeights)
    4968             : {
    4969     8991350 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4970             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4971     8991350 :                                      dfInvWeights);
    4972             : }
    4973             : 
    4974             : /************************************************************************/
    4975             : /*                     GWKResampleNoMasksT<GInt16>()                    */
    4976             : /************************************************************************/
    4977             : 
    4978             : template <>
    4979      252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
    4980             :                                  double dfSrcX, double dfSrcY, GInt16 *pValue,
    4981             :                                  double *padfWeightsHorizontal,
    4982             :                                  double *padfWeightsVertical,
    4983             :                                  double &dfInvWeights)
    4984             : {
    4985      252563 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4986             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4987      252563 :                                      dfInvWeights);
    4988             : }
    4989             : 
    4990             : /************************************************************************/
    4991             : /*                     GWKResampleNoMasksT<GUInt16>()                   */
    4992             : /************************************************************************/
    4993             : 
    4994             : template <>
    4995      343440 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
    4996             :                                   double dfSrcX, double dfSrcY, GUInt16 *pValue,
    4997             :                                   double *padfWeightsHorizontal,
    4998             :                                   double *padfWeightsVertical,
    4999             :                                   double &dfInvWeights)
    5000             : {
    5001      343440 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5002             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5003      343440 :                                      dfInvWeights);
    5004             : }
    5005             : 
    5006             : /************************************************************************/
    5007             : /*                     GWKResampleNoMasksT<float>()                     */
    5008             : /************************************************************************/
    5009             : 
    5010             : template <>
    5011        2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
    5012             :                                 double dfSrcX, double dfSrcY, float *pValue,
    5013             :                                 double *padfWeightsHorizontal,
    5014             :                                 double *padfWeightsVertical,
    5015             :                                 double &dfInvWeights)
    5016             : {
    5017        2500 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5018             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5019        2500 :                                      dfInvWeights);
    5020             : }
    5021             : 
    5022             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    5023             : 
    5024             : /************************************************************************/
    5025             : /*                     GWKResampleNoMasksT<double>()                    */
    5026             : /************************************************************************/
    5027             : 
    5028             : template <>
    5029             : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
    5030             :                                  double dfSrcX, double dfSrcY, double *pValue,
    5031             :                                  double *padfWeightsHorizontal,
    5032             :                                  double *padfWeightsVertical,
    5033             :                                  double &dfInvWeights)
    5034             : {
    5035             :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5036             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5037             :                                      dfInvWeights);
    5038             : }
    5039             : 
    5040             : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
    5041             : 
    5042             : #endif /* defined(USE_SSE2) */
    5043             : 
    5044             : /************************************************************************/
    5045             : /*                     GWKRoundSourceCoordinates()                      */
    5046             : /************************************************************************/
    5047             : 
    5048        1000 : static void GWKRoundSourceCoordinates(
    5049             :     int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
    5050             :     double dfSrcCoordPrecision, double dfErrorThreshold,
    5051             :     GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
    5052             :     double dfDstY)
    5053             : {
    5054        1000 :     double dfPct = 0.8;
    5055        1000 :     if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
    5056             :     {
    5057        1000 :         dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
    5058             :     }
    5059        1000 :     const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
    5060             : 
    5061      501000 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5062             :     {
    5063      500000 :         const double dfXBefore = padfX[iDstX];
    5064      500000 :         const double dfYBefore = padfY[iDstX];
    5065      500000 :         padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    5066             :                        dfSrcCoordPrecision;
    5067      500000 :         padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    5068             :                        dfSrcCoordPrecision;
    5069             : 
    5070             :         // If we are in an uncertainty zone, go to non-approximated
    5071             :         // transformation.
    5072             :         // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
    5073             :         // be at least 10 times greater than the approximation error.
    5074      500000 :         if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
    5075      399914 :             fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
    5076             :         {
    5077      180090 :             padfX[iDstX] = iDstX + dfDstXOff;
    5078      180090 :             padfY[iDstX] = dfDstY;
    5079      180090 :             padfZ[iDstX] = 0.0;
    5080      180090 :             pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
    5081      180090 :                            padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
    5082      180090 :             padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    5083             :                            dfSrcCoordPrecision;
    5084      180090 :             padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    5085             :                            dfSrcCoordPrecision;
    5086             :         }
    5087             :     }
    5088        1000 : }
    5089             : 
    5090             : /************************************************************************/
    5091             : /*                     GWKCheckAndComputeSrcOffsets()                   */
    5092             : /************************************************************************/
    5093             : static CPL_INLINE bool
    5094   152624000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
    5095             :                              int _iDstY, double *_padfX, double *_padfY,
    5096             :                              int _nSrcXSize, int _nSrcYSize,
    5097             :                              GPtrDiff_t &iSrcOffset)
    5098             : {
    5099   152624000 :     const GDALWarpKernel *_poWK = psJob->poWK;
    5100   152829000 :     for (int iTry = 0; iTry < 2; ++iTry)
    5101             :     {
    5102   152829000 :         if (iTry == 1)
    5103             :         {
    5104             :             // If the source coordinate is slightly outside of the source raster
    5105             :             // retry to transform it alone, so that the exact coordinate
    5106             :             // transformer is used.
    5107             : 
    5108      205524 :             _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
    5109      205524 :             _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
    5110      205524 :             double dfZ = 0;
    5111      205524 :             _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
    5112      205524 :                                   _padfX + _iDstX, _padfY + _iDstX, &dfZ,
    5113      205524 :                                   _pabSuccess + _iDstX);
    5114             :         }
    5115   152829000 :         if (!_pabSuccess[_iDstX])
    5116     3614790 :             return false;
    5117             : 
    5118             :         // If this happens this is likely the symptom of a bug somewhere.
    5119   149214000 :         if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
    5120             :         {
    5121             :             static bool bNanCoordFound = false;
    5122           0 :             if (!bNanCoordFound)
    5123             :             {
    5124           0 :                 CPLDebug("WARP",
    5125             :                          "GWKCheckAndComputeSrcOffsets(): "
    5126             :                          "NaN coordinate found on point %d.",
    5127             :                          _iDstX);
    5128           0 :                 bNanCoordFound = true;
    5129             :             }
    5130           0 :             return false;
    5131             :         }
    5132             : 
    5133             :         /* --------------------------------------------------------------------
    5134             :          */
    5135             :         /*      Figure out what pixel we want in our source raster, and skip */
    5136             :         /*      further processing if it is well off the source image. */
    5137             :         /* --------------------------------------------------------------------
    5138             :          */
    5139             :         /* We test against the value before casting to avoid the */
    5140             :         /* problem of asymmetric truncation effects around zero.  That is */
    5141             :         /* -0.5 will be 0 when cast to an int. */
    5142   149214000 :         if (_padfX[_iDstX] < _poWK->nSrcXOff)
    5143             :         {
    5144             :             // If the source coordinate is slightly outside of the source raster
    5145             :             // retry to transform it alone, so that the exact coordinate
    5146             :             // transformer is used.
    5147     6006460 :             if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
    5148       42249 :                 continue;
    5149     5964210 :             return false;
    5150             :         }
    5151             : 
    5152   143208000 :         if (_padfY[_iDstX] < _poWK->nSrcYOff)
    5153             :         {
    5154             :             // If the source coordinate is slightly outside of the source raster
    5155             :             // retry to transform it alone, so that the exact coordinate
    5156             :             // transformer is used.
    5157     6203470 :             if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
    5158       64466 :                 continue;
    5159     6139010 :             return false;
    5160             :         }
    5161             : 
    5162             :         // Check for potential overflow when casting from float to int, (if
    5163             :         // operating outside natural projection area, padfX/Y can be a very huge
    5164             :         // positive number before doing the actual conversion), as such cast is
    5165             :         // undefined behavior that can trigger exception with some compilers
    5166             :         // (see #6753)
    5167   137004000 :         if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
    5168             :         {
    5169             :             // If the source coordinate is slightly outside of the source raster
    5170             :             // retry to transform it alone, so that the exact coordinate
    5171             :             // transformer is used.
    5172     3932310 :             if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
    5173       47544 :                 continue;
    5174     3884760 :             return false;
    5175             :         }
    5176   133072000 :         if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
    5177             :         {
    5178             :             // If the source coordinate is slightly outside of the source raster
    5179             :             // retry to transform it alone, so that the exact coordinate
    5180             :             // transformer is used.
    5181     4488370 :             if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
    5182       51265 :                 continue;
    5183     4437110 :             return false;
    5184             :         }
    5185             : 
    5186   128584000 :         break;
    5187             :     }
    5188             : 
    5189   128584000 :     int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
    5190   128584000 :     int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
    5191   128584000 :     if (iSrcX == _nSrcXSize)
    5192           0 :         iSrcX--;
    5193   128584000 :     if (iSrcY == _nSrcYSize)
    5194           0 :         iSrcY--;
    5195             : 
    5196             :     // Those checks should normally be OK given the previous ones.
    5197   128584000 :     CPLAssert(iSrcX >= 0);
    5198   128584000 :     CPLAssert(iSrcY >= 0);
    5199   128584000 :     CPLAssert(iSrcX < _nSrcXSize);
    5200   128584000 :     CPLAssert(iSrcY < _nSrcYSize);
    5201             : 
    5202   128584000 :     iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
    5203             : 
    5204   128584000 :     return true;
    5205             : }
    5206             : 
    5207             : /************************************************************************/
    5208             : /*                   GWKOneSourceCornerFailsToReproject()               */
    5209             : /************************************************************************/
    5210             : 
    5211         917 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
    5212             : {
    5213         917 :     GDALWarpKernel *poWK = psJob->poWK;
    5214        2741 :     for (int iY = 0; iY <= 1; ++iY)
    5215             :     {
    5216        5478 :         for (int iX = 0; iX <= 1; ++iX)
    5217             :         {
    5218        3654 :             double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
    5219        3654 :             double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
    5220        3654 :             double dfZTmp = 0;
    5221        3654 :             int nSuccess = FALSE;
    5222        3654 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
    5223             :                                  &dfYTmp, &dfZTmp, &nSuccess);
    5224        3654 :             if (!nSuccess)
    5225           6 :                 return true;
    5226             :         }
    5227             :     }
    5228         911 :     return false;
    5229             : }
    5230             : 
    5231             : /************************************************************************/
    5232             : /*                       GWKAdjustSrcOffsetOnEdge()                     */
    5233             : /************************************************************************/
    5234             : 
    5235        9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
    5236             :                                      GPtrDiff_t &iSrcOffset)
    5237             : {
    5238        9714 :     GDALWarpKernel *poWK = psJob->poWK;
    5239        9714 :     const int nSrcXSize = poWK->nSrcXSize;
    5240        9714 :     const int nSrcYSize = poWK->nSrcYSize;
    5241             : 
    5242             :     // Check if the computed source position slightly altered
    5243             :     // fails to reproject. If so, then we are at the edge of
    5244             :     // the validity area, and it is worth checking neighbour
    5245             :     // source pixels for validity.
    5246        9714 :     int nSuccess = FALSE;
    5247             :     {
    5248        9714 :         double dfXTmp =
    5249        9714 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5250        9714 :         double dfYTmp =
    5251        9714 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5252        9714 :         double dfZTmp = 0;
    5253        9714 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5254             :                              &dfZTmp, &nSuccess);
    5255             :     }
    5256        9714 :     if (nSuccess)
    5257             :     {
    5258        6996 :         double dfXTmp =
    5259        6996 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5260        6996 :         double dfYTmp =
    5261        6996 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5262        6996 :         double dfZTmp = 0;
    5263        6996 :         nSuccess = FALSE;
    5264        6996 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5265             :                              &dfZTmp, &nSuccess);
    5266             :     }
    5267        9714 :     if (nSuccess)
    5268             :     {
    5269        5624 :         double dfXTmp =
    5270        5624 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5271        5624 :         double dfYTmp =
    5272        5624 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5273        5624 :         double dfZTmp = 0;
    5274        5624 :         nSuccess = FALSE;
    5275        5624 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5276             :                              &dfZTmp, &nSuccess);
    5277             :     }
    5278             : 
    5279       14166 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5280        4452 :         CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
    5281             :     {
    5282        1860 :         iSrcOffset++;
    5283        1860 :         return true;
    5284             :     }
    5285       10290 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5286        2436 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
    5287             :     {
    5288        1334 :         iSrcOffset += nSrcXSize;
    5289        1334 :         return true;
    5290             :     }
    5291        7838 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5292        1318 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
    5293             :     {
    5294         956 :         iSrcOffset--;
    5295         956 :         return true;
    5296             :     }
    5297        5924 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5298         360 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
    5299             :     {
    5300         340 :         iSrcOffset -= nSrcXSize;
    5301         340 :         return true;
    5302             :     }
    5303             : 
    5304        5224 :     return false;
    5305             : }
    5306             : 
    5307             : /************************************************************************/
    5308             : /*                 GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity()          */
    5309             : /************************************************************************/
    5310             : 
    5311           0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
    5312             :                                                       GPtrDiff_t &iSrcOffset)
    5313             : {
    5314           0 :     GDALWarpKernel *poWK = psJob->poWK;
    5315           0 :     const int nSrcXSize = poWK->nSrcXSize;
    5316           0 :     const int nSrcYSize = poWK->nSrcYSize;
    5317             : 
    5318             :     // Check if the computed source position slightly altered
    5319             :     // fails to reproject. If so, then we are at the edge of
    5320             :     // the validity area, and it is worth checking neighbour
    5321             :     // source pixels for validity.
    5322           0 :     int nSuccess = FALSE;
    5323             :     {
    5324           0 :         double dfXTmp =
    5325           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5326           0 :         double dfYTmp =
    5327           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5328           0 :         double dfZTmp = 0;
    5329           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5330             :                              &dfZTmp, &nSuccess);
    5331             :     }
    5332           0 :     if (nSuccess)
    5333             :     {
    5334           0 :         double dfXTmp =
    5335           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5336           0 :         double dfYTmp =
    5337           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5338           0 :         double dfZTmp = 0;
    5339           0 :         nSuccess = FALSE;
    5340           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5341             :                              &dfZTmp, &nSuccess);
    5342             :     }
    5343           0 :     if (nSuccess)
    5344             :     {
    5345           0 :         double dfXTmp =
    5346           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5347           0 :         double dfYTmp =
    5348           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5349           0 :         double dfZTmp = 0;
    5350           0 :         nSuccess = FALSE;
    5351           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5352             :                              &dfZTmp, &nSuccess);
    5353             :     }
    5354             : 
    5355           0 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5356           0 :         poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >=
    5357             :             SRC_DENSITY_THRESHOLD_FLOAT)
    5358             :     {
    5359           0 :         iSrcOffset++;
    5360           0 :         return true;
    5361             :     }
    5362           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5363           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
    5364             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5365             :     {
    5366           0 :         iSrcOffset += nSrcXSize;
    5367           0 :         return true;
    5368             :     }
    5369           0 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5370           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
    5371             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5372             :     {
    5373           0 :         iSrcOffset--;
    5374           0 :         return true;
    5375             :     }
    5376           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5377           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
    5378             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5379             :     {
    5380           0 :         iSrcOffset -= nSrcXSize;
    5381           0 :         return true;
    5382             :     }
    5383             : 
    5384           0 :     return false;
    5385             : }
    5386             : 
    5387             : /************************************************************************/
    5388             : /*                           GWKGeneralCase()                           */
    5389             : /*                                                                      */
    5390             : /*      This is the most general case.  It attempts to handle all       */
    5391             : /*      possible features with relatively little concern for            */
    5392             : /*      efficiency.                                                     */
    5393             : /************************************************************************/
    5394             : 
    5395         239 : static void GWKGeneralCaseThread(void *pData)
    5396             : {
    5397         239 :     GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
    5398         239 :     GDALWarpKernel *poWK = psJob->poWK;
    5399         239 :     const int iYMin = psJob->iYMin;
    5400         239 :     const int iYMax = psJob->iYMax;
    5401             :     const double dfMultFactorVerticalShiftPipeline =
    5402         239 :         poWK->bApplyVerticalShift
    5403         239 :             ? CPLAtof(CSLFetchNameValueDef(
    5404           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5405             :                   "1.0"))
    5406         239 :             : 0.0;
    5407             :     const bool bAvoidNoDataSingleBand =
    5408         239 :         poWK->nBands == 1 ||
    5409           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    5410         239 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    5411             : 
    5412         239 :     int nDstXSize = poWK->nDstXSize;
    5413         239 :     int nSrcXSize = poWK->nSrcXSize;
    5414         239 :     int nSrcYSize = poWK->nSrcYSize;
    5415             : 
    5416             :     /* -------------------------------------------------------------------- */
    5417             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5418             :     /*      scanlines worth of positions.                                   */
    5419             :     /* -------------------------------------------------------------------- */
    5420             :     // For x, 2 *, because we cache the precomputed values at the end.
    5421             :     double *padfX =
    5422         239 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5423             :     double *padfY =
    5424         239 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5425             :     double *padfZ =
    5426         239 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5427         239 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5428             : 
    5429         239 :     const bool bUse4SamplesFormula =
    5430         239 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    5431             : 
    5432         239 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5433         239 :     if (poWK->eResample != GRA_NearestNeighbour)
    5434             :     {
    5435         220 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5436             :     }
    5437         239 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5438         239 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5439         239 :     const double dfErrorThreshold = CPLAtof(
    5440         239 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5441             : 
    5442             :     const bool bOneSourceCornerFailsToReproject =
    5443         239 :         GWKOneSourceCornerFailsToReproject(psJob);
    5444             : 
    5445             :     // Precompute values.
    5446        6469 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5447        6230 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5448             : 
    5449             :     /* ==================================================================== */
    5450             :     /*      Loop over output lines.                                         */
    5451             :     /* ==================================================================== */
    5452        6469 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5453             :     {
    5454             :         /* --------------------------------------------------------------------
    5455             :          */
    5456             :         /*      Setup points to transform to source image space. */
    5457             :         /* --------------------------------------------------------------------
    5458             :          */
    5459        6230 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5460        6230 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5461      242390 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5462      236160 :             padfY[iDstX] = dfY;
    5463        6230 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5464             : 
    5465             :         /* --------------------------------------------------------------------
    5466             :          */
    5467             :         /*      Transform the points from destination pixel/line coordinates */
    5468             :         /*      to source pixel/line coordinates. */
    5469             :         /* --------------------------------------------------------------------
    5470             :          */
    5471        6230 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5472             :                              padfY, padfZ, pabSuccess);
    5473        6230 :         if (dfSrcCoordPrecision > 0.0)
    5474             :         {
    5475           0 :             GWKRoundSourceCoordinates(
    5476             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5477             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5478           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5479             :         }
    5480             : 
    5481             :         /* ====================================================================
    5482             :          */
    5483             :         /*      Loop over pixels in output scanline. */
    5484             :         /* ====================================================================
    5485             :          */
    5486      242390 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5487             :         {
    5488      236160 :             GPtrDiff_t iSrcOffset = 0;
    5489      236160 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5490             :                                               padfX, padfY, nSrcXSize,
    5491             :                                               nSrcYSize, iSrcOffset))
    5492           0 :                 continue;
    5493             : 
    5494             :             /* --------------------------------------------------------------------
    5495             :              */
    5496             :             /*      Do not try to apply transparent/invalid source pixels to the
    5497             :              */
    5498             :             /*      destination.  This currently ignores the multi-pixel input
    5499             :              */
    5500             :             /*      of bilinear and cubic resamples. */
    5501             :             /* --------------------------------------------------------------------
    5502             :              */
    5503      236160 :             double dfDensity = 1.0;
    5504             : 
    5505      236160 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5506             :             {
    5507        1200 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5508        1200 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    5509             :                 {
    5510           0 :                     if (!bOneSourceCornerFailsToReproject)
    5511             :                     {
    5512           0 :                         continue;
    5513             :                     }
    5514           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5515             :                                  psJob, iSrcOffset))
    5516             :                     {
    5517           0 :                         dfDensity =
    5518           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5519             :                     }
    5520             :                     else
    5521             :                     {
    5522           0 :                         continue;
    5523             :                     }
    5524             :                 }
    5525             :             }
    5526             : 
    5527      236160 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5528           0 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5529             :             {
    5530           0 :                 if (!bOneSourceCornerFailsToReproject)
    5531             :                 {
    5532           0 :                     continue;
    5533             :                 }
    5534           0 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5535             :                 {
    5536           0 :                     continue;
    5537             :                 }
    5538             :             }
    5539             : 
    5540             :             /* ====================================================================
    5541             :              */
    5542             :             /*      Loop processing each band. */
    5543             :             /* ====================================================================
    5544             :              */
    5545      236160 :             bool bHasFoundDensity = false;
    5546             : 
    5547      236160 :             const GPtrDiff_t iDstOffset =
    5548      236160 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5549      472320 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5550             :             {
    5551      236160 :                 double dfBandDensity = 0.0;
    5552      236160 :                 double dfValueReal = 0.0;
    5553      236160 :                 double dfValueImag = 0.0;
    5554             : 
    5555             :                 /* --------------------------------------------------------------------
    5556             :                  */
    5557             :                 /*      Collect the source value. */
    5558             :                 /* --------------------------------------------------------------------
    5559             :                  */
    5560      236160 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5561             :                     nSrcYSize == 1)
    5562             :                 {
    5563             :                     // FALSE is returned if dfBandDensity == 0, which is
    5564             :                     // checked below.
    5565         568 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValue(
    5566             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
    5567             :                         &dfValueImag));
    5568             :                 }
    5569      235592 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5570             :                 {
    5571         248 :                     GWKBilinearResample4Sample(
    5572         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5573         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5574             :                         &dfValueReal, &dfValueImag);
    5575             :                 }
    5576      235344 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5577             :                 {
    5578         248 :                     GWKCubicResample4Sample(
    5579         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5580         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5581             :                         &dfValueReal, &dfValueImag);
    5582             :                 }
    5583             :                 else
    5584             : #ifdef DEBUG
    5585             :                     // Only useful for clang static analyzer.
    5586      235096 :                     if (psWrkStruct != nullptr)
    5587             : #endif
    5588             :                     {
    5589      235096 :                         psWrkStruct->pfnGWKResample(
    5590      235096 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5591      235096 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5592             :                             &dfValueReal, &dfValueImag, psWrkStruct);
    5593             :                     }
    5594             : 
    5595             :                 // If we didn't find any valid inputs skip to next band.
    5596      236160 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5597           0 :                     continue;
    5598             : 
    5599      236160 :                 if (poWK->bApplyVerticalShift)
    5600             :                 {
    5601           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5602           0 :                         continue;
    5603             :                     // Subtract padfZ[] since the coordinate transformation is
    5604             :                     // from target to source
    5605           0 :                     dfValueReal =
    5606           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5607           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5608             :                 }
    5609             : 
    5610      236160 :                 bHasFoundDensity = true;
    5611             : 
    5612             :                 /* --------------------------------------------------------------------
    5613             :                  */
    5614             :                 /*      We have a computed value from the source.  Now apply it
    5615             :                  * to      */
    5616             :                 /*      the destination pixel. */
    5617             :                 /* --------------------------------------------------------------------
    5618             :                  */
    5619      236160 :                 GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    5620             :                                  dfValueReal, dfValueImag,
    5621             :                                  bAvoidNoDataSingleBand);
    5622             :             }
    5623             : 
    5624      236160 :             if (!bHasFoundDensity)
    5625           0 :                 continue;
    5626             : 
    5627      236160 :             if (!bAvoidNoDataSingleBand)
    5628             :             {
    5629           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    5630             :             }
    5631             : 
    5632             :             /* --------------------------------------------------------------------
    5633             :              */
    5634             :             /*      Update destination density/validity masks. */
    5635             :             /* --------------------------------------------------------------------
    5636             :              */
    5637      236160 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5638             : 
    5639      236160 :             if (poWK->panDstValid != nullptr)
    5640             :             {
    5641           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5642             :             }
    5643             :         } /* Next iDstX */
    5644             : 
    5645             :         /* --------------------------------------------------------------------
    5646             :          */
    5647             :         /*      Report progress to the user, and optionally cancel out. */
    5648             :         /* --------------------------------------------------------------------
    5649             :          */
    5650        6230 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5651           0 :             break;
    5652             :     }
    5653             : 
    5654             :     /* -------------------------------------------------------------------- */
    5655             :     /*      Cleanup and return.                                             */
    5656             :     /* -------------------------------------------------------------------- */
    5657         239 :     CPLFree(padfX);
    5658         239 :     CPLFree(padfY);
    5659         239 :     CPLFree(padfZ);
    5660         239 :     CPLFree(pabSuccess);
    5661         239 :     if (psWrkStruct)
    5662         220 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5663         239 : }
    5664             : 
    5665         239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
    5666             : {
    5667         239 :     return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
    5668             : }
    5669             : 
    5670             : /************************************************************************/
    5671             : /*                            GWKRealCase()                             */
    5672             : /*                                                                      */
    5673             : /*      General case for non-complex data types.                        */
    5674             : /************************************************************************/
    5675             : 
    5676         219 : static void GWKRealCaseThread(void *pData)
    5677             : 
    5678             : {
    5679         219 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    5680         219 :     GDALWarpKernel *poWK = psJob->poWK;
    5681         219 :     const int iYMin = psJob->iYMin;
    5682         219 :     const int iYMax = psJob->iYMax;
    5683             : 
    5684         219 :     const int nDstXSize = poWK->nDstXSize;
    5685         219 :     const int nSrcXSize = poWK->nSrcXSize;
    5686         219 :     const int nSrcYSize = poWK->nSrcYSize;
    5687             :     const double dfMultFactorVerticalShiftPipeline =
    5688         219 :         poWK->bApplyVerticalShift
    5689         219 :             ? CPLAtof(CSLFetchNameValueDef(
    5690           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5691             :                   "1.0"))
    5692         219 :             : 0.0;
    5693             :     const bool bAvoidNoDataSingleBand =
    5694         297 :         poWK->nBands == 1 ||
    5695          78 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    5696         219 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    5697             : 
    5698             :     /* -------------------------------------------------------------------- */
    5699             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5700             :     /*      scanlines worth of positions.                                   */
    5701             :     /* -------------------------------------------------------------------- */
    5702             : 
    5703             :     // For x, 2 *, because we cache the precomputed values at the end.
    5704             :     double *padfX =
    5705         219 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5706             :     double *padfY =
    5707         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5708             :     double *padfZ =
    5709         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5710         219 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5711             : 
    5712         219 :     const bool bUse4SamplesFormula =
    5713         219 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    5714             : 
    5715         219 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5716         219 :     if (poWK->eResample != GRA_NearestNeighbour)
    5717             :     {
    5718         177 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5719             :     }
    5720         219 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5721         219 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5722         219 :     const double dfErrorThreshold = CPLAtof(
    5723         219 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5724             : 
    5725         626 :     const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
    5726         407 :                                    poWK->papanBandSrcValid == nullptr &&
    5727         188 :                                    poWK->pafUnifiedSrcDensity != nullptr;
    5728             : 
    5729             :     const bool bOneSourceCornerFailsToReproject =
    5730         219 :         GWKOneSourceCornerFailsToReproject(psJob);
    5731             : 
    5732             :     // Precompute values.
    5733       22605 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5734       22386 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5735             : 
    5736             :     /* ==================================================================== */
    5737             :     /*      Loop over output lines.                                         */
    5738             :     /* ==================================================================== */
    5739       25393 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5740             :     {
    5741             :         /* --------------------------------------------------------------------
    5742             :          */
    5743             :         /*      Setup points to transform to source image space. */
    5744             :         /* --------------------------------------------------------------------
    5745             :          */
    5746       25174 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5747       25174 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5748    44331500 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5749    44306300 :             padfY[iDstX] = dfY;
    5750       25174 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5751             : 
    5752             :         /* --------------------------------------------------------------------
    5753             :          */
    5754             :         /*      Transform the points from destination pixel/line coordinates */
    5755             :         /*      to source pixel/line coordinates. */
    5756             :         /* --------------------------------------------------------------------
    5757             :          */
    5758       25174 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5759             :                              padfY, padfZ, pabSuccess);
    5760       25174 :         if (dfSrcCoordPrecision > 0.0)
    5761             :         {
    5762           0 :             GWKRoundSourceCoordinates(
    5763             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5764             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5765           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5766             :         }
    5767             : 
    5768             :         /* ====================================================================
    5769             :          */
    5770             :         /*      Loop over pixels in output scanline. */
    5771             :         /* ====================================================================
    5772             :          */
    5773    44331500 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5774             :         {
    5775    44306300 :             GPtrDiff_t iSrcOffset = 0;
    5776    44306300 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5777             :                                               padfX, padfY, nSrcXSize,
    5778             :                                               nSrcYSize, iSrcOffset))
    5779    43567600 :                 continue;
    5780             : 
    5781             :             /* --------------------------------------------------------------------
    5782             :              */
    5783             :             /*      Do not try to apply transparent/invalid source pixels to the
    5784             :              */
    5785             :             /*      destination.  This currently ignores the multi-pixel input
    5786             :              */
    5787             :             /*      of bilinear and cubic resamples. */
    5788             :             /* --------------------------------------------------------------------
    5789             :              */
    5790    31793100 :             double dfDensity = 1.0;
    5791             : 
    5792    31793100 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5793             :             {
    5794     1656100 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5795     1656100 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    5796             :                 {
    5797     1525010 :                     if (!bOneSourceCornerFailsToReproject)
    5798             :                     {
    5799     1525010 :                         continue;
    5800             :                     }
    5801           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5802             :                                  psJob, iSrcOffset))
    5803             :                     {
    5804           0 :                         dfDensity =
    5805           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5806             :                     }
    5807             :                     else
    5808             :                     {
    5809           0 :                         continue;
    5810             :                     }
    5811             :                 }
    5812             :             }
    5813             : 
    5814    59897300 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5815    29629200 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5816             :             {
    5817    29531600 :                 if (!bOneSourceCornerFailsToReproject)
    5818             :                 {
    5819    29529300 :                     continue;
    5820             :                 }
    5821        2229 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5822             :                 {
    5823           0 :                     continue;
    5824             :                 }
    5825             :             }
    5826             : 
    5827             :             /* ====================================================================
    5828             :              */
    5829             :             /*      Loop processing each band. */
    5830             :             /* ====================================================================
    5831             :              */
    5832      738768 :             bool bHasFoundDensity = false;
    5833             : 
    5834      738768 :             const GPtrDiff_t iDstOffset =
    5835      738768 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5836     2069310 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5837             :             {
    5838     1330540 :                 double dfBandDensity = 0.0;
    5839     1330540 :                 double dfValueReal = 0.0;
    5840             : 
    5841             :                 /* --------------------------------------------------------------------
    5842             :                  */
    5843             :                 /*      Collect the source value. */
    5844             :                 /* --------------------------------------------------------------------
    5845             :                  */
    5846     1330540 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5847             :                     nSrcYSize == 1)
    5848             :                 {
    5849             :                     // FALSE is returned if dfBandDensity == 0, which is
    5850             :                     // checked below.
    5851       15516 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
    5852             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
    5853             :                 }
    5854     1315030 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5855             :                 {
    5856        2046 :                     double dfValueImagIgnored = 0.0;
    5857        2046 :                     GWKBilinearResample4Sample(
    5858        2046 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5859        2046 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5860        2046 :                         &dfValueReal, &dfValueImagIgnored);
    5861             :                 }
    5862     1312980 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5863             :                 {
    5864      691552 :                     if (bSrcMaskIsDensity)
    5865             :                     {
    5866      389755 :                         if (poWK->eWorkingDataType == GDT_UInt8)
    5867             :                         {
    5868      389755 :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
    5869      389755 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5870      389755 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5871             :                                 &dfValueReal);
    5872             :                         }
    5873           0 :                         else if (poWK->eWorkingDataType == GDT_UInt16)
    5874             :                         {
    5875             :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<
    5876           0 :                                 GUInt16>(poWK, iBand,
    5877           0 :                                          padfX[iDstX] - poWK->nSrcXOff,
    5878           0 :                                          padfY[iDstX] - poWK->nSrcYOff,
    5879             :                                          &dfBandDensity, &dfValueReal);
    5880             :                         }
    5881             :                         else
    5882             :                         {
    5883           0 :                             GWKCubicResampleSrcMaskIsDensity4SampleReal(
    5884           0 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5885           0 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5886             :                                 &dfValueReal);
    5887             :                         }
    5888             :                     }
    5889             :                     else
    5890             :                     {
    5891      301797 :                         double dfValueImagIgnored = 0.0;
    5892      301797 :                         GWKCubicResample4Sample(
    5893      301797 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5894      301797 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5895             :                             &dfValueReal, &dfValueImagIgnored);
    5896      691552 :                     }
    5897             :                 }
    5898             :                 else
    5899             : #ifdef DEBUG
    5900             :                     // Only useful for clang static analyzer.
    5901      621431 :                     if (psWrkStruct != nullptr)
    5902             : #endif
    5903             :                     {
    5904      621431 :                         double dfValueImagIgnored = 0.0;
    5905      621431 :                         psWrkStruct->pfnGWKResample(
    5906      621431 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5907      621431 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5908             :                             &dfValueReal, &dfValueImagIgnored, psWrkStruct);
    5909             :                     }
    5910             : 
    5911             :                 // If we didn't find any valid inputs skip to next band.
    5912     1330540 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5913           0 :                     continue;
    5914             : 
    5915     1330540 :                 if (poWK->bApplyVerticalShift)
    5916             :                 {
    5917           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5918           0 :                         continue;
    5919             :                     // Subtract padfZ[] since the coordinate transformation is
    5920             :                     // from target to source
    5921           0 :                     dfValueReal =
    5922           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5923           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5924             :                 }
    5925             : 
    5926     1330540 :                 bHasFoundDensity = true;
    5927             : 
    5928             :                 /* --------------------------------------------------------------------
    5929             :                  */
    5930             :                 /*      We have a computed value from the source.  Now apply it
    5931             :                  * to      */
    5932             :                 /*      the destination pixel. */
    5933             :                 /* --------------------------------------------------------------------
    5934             :                  */
    5935     1330540 :                 GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
    5936             :                                      dfValueReal, bAvoidNoDataSingleBand);
    5937             :             }
    5938             : 
    5939      738768 :             if (!bHasFoundDensity)
    5940           0 :                 continue;
    5941             : 
    5942      738768 :             if (!bAvoidNoDataSingleBand)
    5943             :             {
    5944      100295 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    5945             :             }
    5946             : 
    5947             :             /* --------------------------------------------------------------------
    5948             :              */
    5949             :             /*      Update destination density/validity masks. */
    5950             :             /* --------------------------------------------------------------------
    5951             :              */
    5952      738768 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5953             : 
    5954      738768 :             if (poWK->panDstValid != nullptr)
    5955             :             {
    5956      104586 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5957             :             }
    5958             :         }  // Next iDstX.
    5959             : 
    5960             :         /* --------------------------------------------------------------------
    5961             :          */
    5962             :         /*      Report progress to the user, and optionally cancel out. */
    5963             :         /* --------------------------------------------------------------------
    5964             :          */
    5965       25174 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5966           0 :             break;
    5967             :     }
    5968             : 
    5969             :     /* -------------------------------------------------------------------- */
    5970             :     /*      Cleanup and return.                                             */
    5971             :     /* -------------------------------------------------------------------- */
    5972         219 :     CPLFree(padfX);
    5973         219 :     CPLFree(padfY);
    5974         219 :     CPLFree(padfZ);
    5975         219 :     CPLFree(pabSuccess);
    5976         219 :     if (psWrkStruct)
    5977         177 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5978         219 : }
    5979             : 
    5980         219 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
    5981             : {
    5982         219 :     return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
    5983             : }
    5984             : 
    5985             : /************************************************************************/
    5986             : /*                 GWKCubicResampleNoMasks4MultiBandT()                 */
    5987             : /************************************************************************/
    5988             : 
    5989             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    5990             : /* and enough SSE registries */
    5991             : #if defined(USE_SSE2)
    5992             : 
    5993   108610000 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
    5994             :                                  const __m128 row2, const __m128 row3,
    5995             :                                  const __m128 weightsXY0,
    5996             :                                  const __m128 weightsXY1,
    5997             :                                  const __m128 weightsXY2,
    5998             :                                  const __m128 weightsXY3)
    5999             : {
    6000   760270000 :     return XMMHorizontalAdd(_mm_add_ps(
    6001             :         _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
    6002             :         _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
    6003   108610000 :                    _mm_mul_ps(row3, weightsXY3))));
    6004             : }
    6005             : 
    6006             : template <class T>
    6007    37174677 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
    6008             :                                                double dfSrcX, double dfSrcY,
    6009             :                                                const GPtrDiff_t iDstOffset)
    6010             : {
    6011    37174677 :     const double dfSrcXShifted = dfSrcX - 0.5;
    6012    37174677 :     const int iSrcX = static_cast<int>(dfSrcXShifted);
    6013    37174677 :     const double dfSrcYShifted = dfSrcY - 0.5;
    6014    37174677 :     const int iSrcY = static_cast<int>(dfSrcYShifted);
    6015    37174677 :     const GPtrDiff_t iSrcOffset =
    6016    37174677 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    6017             : 
    6018             :     // Get the bilinear interpolation at the image borders.
    6019    37174677 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    6020    36214777 :         iSrcY + 2 >= poWK->nSrcYSize)
    6021             :     {
    6022     3885370 :         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6023             :         {
    6024             :             T value;
    6025     2914030 :             GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    6026             :                                                &value);
    6027     2914030 :             reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6028             :                 value;
    6029      971343 :         }
    6030             :     }
    6031             :     else
    6032             :     {
    6033    36203377 :         const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
    6034    36203377 :         const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
    6035             : 
    6036             :         float afCoeffsX[4];
    6037             :         float afCoeffsY[4];
    6038    36203377 :         GWKCubicComputeWeights(fDeltaX, afCoeffsX);
    6039    36203377 :         GWKCubicComputeWeights(fDeltaY, afCoeffsY);
    6040    36203377 :         const auto weightsX = _mm_loadu_ps(afCoeffsX);
    6041             :         const auto weightsXY0 =
    6042    72406754 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
    6043             :         const auto weightsXY1 =
    6044    72406754 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
    6045             :         const auto weightsXY2 =
    6046    72406754 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
    6047             :         const auto weightsXY3 =
    6048    36203377 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
    6049             : 
    6050    36203377 :         const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
    6051             : 
    6052    36203377 :         int iBand = 0;
    6053             :         // Process 2 bands at a time
    6054    72406754 :         for (; iBand + 1 < poWK->nBands; iBand += 2)
    6055             :         {
    6056    36203377 :             const T *CPL_RESTRICT pBand0 =
    6057    36203377 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6058    36203377 :             const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
    6059             :             const auto row1_0 =
    6060    36203377 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6061             :             const auto row2_0 =
    6062    36203377 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6063             :             const auto row3_0 =
    6064    36203377 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6065             : 
    6066    36203377 :             const T *CPL_RESTRICT pBand1 =
    6067    36203377 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
    6068    36203377 :             const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
    6069             :             const auto row1_1 =
    6070    36203377 :                 XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
    6071             :             const auto row2_1 =
    6072    36203377 :                 XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
    6073             :             const auto row3_1 =
    6074    36203377 :                 XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
    6075             : 
    6076             :             const float fValue_0 =
    6077    36203377 :                 Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
    6078             :                              weightsXY1, weightsXY2, weightsXY3);
    6079             : 
    6080             :             const float fValue_1 =
    6081    36203377 :                 Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
    6082             :                              weightsXY1, weightsXY2, weightsXY3);
    6083             : 
    6084    36203377 :             T *CPL_RESTRICT pDstBand0 =
    6085    36203377 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6086    36203377 :             pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
    6087             : 
    6088    36203377 :             T *CPL_RESTRICT pDstBand1 =
    6089    36203377 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
    6090    36203377 :             pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
    6091             :         }
    6092    36203377 :         if (iBand < poWK->nBands)
    6093             :         {
    6094    36203377 :             const T *CPL_RESTRICT pBand0 =
    6095    36203377 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6096    36203377 :             const auto row0 = XMMLoad4Values(pBand0 + iOffset);
    6097             :             const auto row1 =
    6098    36203377 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6099             :             const auto row2 =
    6100    36203377 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6101             :             const auto row3 =
    6102    36203377 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6103             : 
    6104             :             const float fValue =
    6105    36203377 :                 Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
    6106             :                              weightsXY2, weightsXY3);
    6107             : 
    6108    36203377 :             T *CPL_RESTRICT pDstBand =
    6109    36203377 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6110    36203377 :             pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
    6111             :         }
    6112             :     }
    6113             : 
    6114    37174677 :     if (poWK->pafDstDensity)
    6115    37093836 :         poWK->pafDstDensity[iDstOffset] = 1.0f;
    6116    37174677 : }
    6117             : 
    6118             : #endif  // defined(USE_SSE2)
    6119             : 
    6120             : /************************************************************************/
    6121             : /*                GWKResampleNoMasksOrDstDensityOnlyThreadInternal()    */
    6122             : /************************************************************************/
    6123             : 
    6124             : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
    6125        1842 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
    6126             : 
    6127             : {
    6128        1842 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6129        1842 :     GDALWarpKernel *poWK = psJob->poWK;
    6130        1842 :     const int iYMin = psJob->iYMin;
    6131        1842 :     const int iYMax = psJob->iYMax;
    6132        1824 :     const double dfMultFactorVerticalShiftPipeline =
    6133        1842 :         poWK->bApplyVerticalShift
    6134          18 :             ? CPLAtof(CSLFetchNameValueDef(
    6135          18 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6136             :                   "1.0"))
    6137             :             : 0.0;
    6138             : 
    6139        1842 :     const int nDstXSize = poWK->nDstXSize;
    6140        1842 :     const int nSrcXSize = poWK->nSrcXSize;
    6141        1842 :     const int nSrcYSize = poWK->nSrcYSize;
    6142             : 
    6143             :     /* -------------------------------------------------------------------- */
    6144             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6145             :     /*      scanlines worth of positions.                                   */
    6146             :     /* -------------------------------------------------------------------- */
    6147             : 
    6148             :     // For x, 2 *, because we cache the precomputed values at the end.
    6149             :     double *padfX =
    6150        1842 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6151             :     double *padfY =
    6152        1842 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6153             :     double *padfZ =
    6154        1842 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6155        1842 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6156             : 
    6157        1842 :     const int nXRadius = poWK->nXRadius;
    6158             :     double *padfWeightsX =
    6159        1842 :         static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
    6160             :     double *padfWeightsY = static_cast<double *>(
    6161        1842 :         CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
    6162        1842 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6163        1842 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6164        1842 :     const double dfErrorThreshold = CPLAtof(
    6165        1842 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6166             : 
    6167             :     // Precompute values.
    6168      418872 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6169      417030 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6170             : 
    6171             :     /* ==================================================================== */
    6172             :     /*      Loop over output lines.                                         */
    6173             :     /* ==================================================================== */
    6174      293317 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6175             :     {
    6176             :         /* --------------------------------------------------------------------
    6177             :          */
    6178             :         /*      Setup points to transform to source image space. */
    6179             :         /* --------------------------------------------------------------------
    6180             :          */
    6181      291476 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6182      291476 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6183    98586759 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6184    98295220 :             padfY[iDstX] = dfY;
    6185      291476 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6186             : 
    6187             :         /* --------------------------------------------------------------------
    6188             :          */
    6189             :         /*      Transform the points from destination pixel/line coordinates */
    6190             :         /*      to source pixel/line coordinates. */
    6191             :         /* --------------------------------------------------------------------
    6192             :          */
    6193      291476 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6194             :                              padfY, padfZ, pabSuccess);
    6195      291476 :         if (dfSrcCoordPrecision > 0.0)
    6196             :         {
    6197        1000 :             GWKRoundSourceCoordinates(
    6198             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6199             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6200        1000 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6201             :         }
    6202             : 
    6203             :         /* ====================================================================
    6204             :          */
    6205             :         /*      Loop over pixels in output scanline. */
    6206             :         /* ====================================================================
    6207             :          */
    6208    98586759 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6209             :         {
    6210    98295220 :             GPtrDiff_t iSrcOffset = 0;
    6211    98295220 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6212             :                                               padfX, padfY, nSrcXSize,
    6213             :                                               nSrcYSize, iSrcOffset))
    6214    47394279 :                 continue;
    6215             : 
    6216             :             /* ====================================================================
    6217             :              */
    6218             :             /*      Loop processing each band. */
    6219             :             /* ====================================================================
    6220             :              */
    6221    88075783 :             const GPtrDiff_t iDstOffset =
    6222    88075783 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6223             : 
    6224             : #if defined(USE_SSE2)
    6225             :             if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
    6226             :                           (std::is_same<T, GByte>::value ||
    6227             :                            std::is_same<T, GUInt16>::value))
    6228             :             {
    6229    38170876 :                 if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
    6230             :                 {
    6231    37174677 :                     GWKCubicResampleNoMasks4MultiBandT<T>(
    6232    37174677 :                         poWK, padfX[iDstX] - poWK->nSrcXOff,
    6233    37174677 :                         padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
    6234             : 
    6235    37174677 :                     continue;
    6236             :                 }
    6237             :             }
    6238             : #endif  // defined(USE_SSE2)
    6239             : 
    6240    50901040 :             [[maybe_unused]] double dfInvWeights = 0;
    6241   144559858 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6242             :             {
    6243    93659054 :                 T value = 0;
    6244             :                 if constexpr (eResample == GRA_NearestNeighbour)
    6245             :                 {
    6246    78474530 :                     value = reinterpret_cast<T *>(
    6247    78474530 :                         poWK->papabySrcImage[iBand])[iSrcOffset];
    6248             :                 }
    6249             :                 else if constexpr (bUse4SamplesFormula)
    6250             :                 {
    6251             :                     if constexpr (eResample == GRA_Bilinear)
    6252     3363189 :                         GWKBilinearResampleNoMasks4SampleT(
    6253     3363189 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6254     3363189 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6255             :                     else
    6256     2231485 :                         GWKCubicResampleNoMasks4SampleT(
    6257     2231485 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6258     2231485 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6259             :                 }
    6260             :                 else
    6261             :                 {
    6262     9589850 :                     GWKResampleNoMasksT(
    6263     9589850 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6264     9589850 :                         padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
    6265             :                         padfWeightsY, dfInvWeights);
    6266             :                 }
    6267             : 
    6268    93659054 :                 if (poWK->bApplyVerticalShift)
    6269             :                 {
    6270         818 :                     if (!std::isfinite(padfZ[iDstX]))
    6271           0 :                         continue;
    6272             :                     // Subtract padfZ[] since the coordinate transformation is
    6273             :                     // from target to source
    6274         818 :                     value = GWKClampValueT<T>(
    6275         818 :                         double(value) * poWK->dfMultFactorVerticalShift -
    6276         818 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6277             :                 }
    6278             : 
    6279    93659054 :                 if (poWK->pafDstDensity)
    6280    14049274 :                     poWK->pafDstDensity[iDstOffset] = 1.0f;
    6281             : 
    6282    93659054 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6283             :                     value;
    6284             :             }
    6285             :         }
    6286             : 
    6287             :         /* --------------------------------------------------------------------
    6288             :          */
    6289             :         /*      Report progress to the user, and optionally cancel out. */
    6290             :         /* --------------------------------------------------------------------
    6291             :          */
    6292      291476 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6293           1 :             break;
    6294             :     }
    6295             : 
    6296             :     /* -------------------------------------------------------------------- */
    6297             :     /*      Cleanup and return.                                             */
    6298             :     /* -------------------------------------------------------------------- */
    6299        1842 :     CPLFree(padfX);
    6300        1842 :     CPLFree(padfY);
    6301        1842 :     CPLFree(padfZ);
    6302        1842 :     CPLFree(pabSuccess);
    6303        1842 :     CPLFree(padfWeightsX);
    6304        1842 :     CPLFree(padfWeightsY);
    6305        1842 : }
    6306             : 
    6307             : template <class T, GDALResampleAlg eResample>
    6308         994 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
    6309             : {
    6310         994 :     GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6311             :         pData);
    6312         994 : }
    6313             : 
    6314             : template <class T, GDALResampleAlg eResample>
    6315         848 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
    6316             : 
    6317             : {
    6318         848 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6319         848 :     GDALWarpKernel *poWK = psJob->poWK;
    6320             :     static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
    6321         848 :     const bool bUse4SamplesFormula =
    6322         848 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    6323         848 :     if (bUse4SamplesFormula)
    6324         746 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
    6325             :             pData);
    6326             :     else
    6327         102 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6328             :             pData);
    6329         848 : }
    6330             : 
    6331         943 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6332             : {
    6333         943 :     return GWKRun(
    6334             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
    6335         943 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
    6336             : }
    6337             : 
    6338         126 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6339             : {
    6340         126 :     return GWKRun(
    6341             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
    6342             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
    6343         126 :                                                            GRA_Bilinear>);
    6344             : }
    6345             : 
    6346         676 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6347             : {
    6348         676 :     return GWKRun(
    6349             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
    6350         676 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
    6351             : }
    6352             : 
    6353           9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6354             : {
    6355           9 :     return GWKRun(
    6356             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
    6357           9 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
    6358             : }
    6359             : 
    6360             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6361             : 
    6362             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6363             : {
    6364             :     return GWKRun(
    6365             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
    6366             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
    6367             : }
    6368             : #endif
    6369             : 
    6370          12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6371             : {
    6372          12 :     return GWKRun(
    6373             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
    6374          12 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
    6375             : }
    6376             : 
    6377             : /************************************************************************/
    6378             : /*                          GWKNearestByte()                            */
    6379             : /*                                                                      */
    6380             : /*      Case for 8bit input data with nearest neighbour resampling      */
    6381             : /*      using valid flags. Should be as fast as possible for this       */
    6382             : /*      particular transformation type.                                 */
    6383             : /************************************************************************/
    6384             : 
    6385         459 : template <class T> static void GWKNearestThread(void *pData)
    6386             : 
    6387             : {
    6388         459 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6389         459 :     GDALWarpKernel *poWK = psJob->poWK;
    6390         459 :     const int iYMin = psJob->iYMin;
    6391         459 :     const int iYMax = psJob->iYMax;
    6392         459 :     const double dfMultFactorVerticalShiftPipeline =
    6393         459 :         poWK->bApplyVerticalShift
    6394           0 :             ? CPLAtof(CSLFetchNameValueDef(
    6395           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6396             :                   "1.0"))
    6397             :             : 0.0;
    6398         459 :     const bool bAvoidNoDataSingleBand =
    6399         525 :         poWK->nBands == 1 ||
    6400          66 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    6401             :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    6402             : 
    6403         459 :     const int nDstXSize = poWK->nDstXSize;
    6404         459 :     const int nSrcXSize = poWK->nSrcXSize;
    6405         459 :     const int nSrcYSize = poWK->nSrcYSize;
    6406             : 
    6407             :     /* -------------------------------------------------------------------- */
    6408             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6409             :     /*      scanlines worth of positions.                                   */
    6410             :     /* -------------------------------------------------------------------- */
    6411             : 
    6412             :     // For x, 2 *, because we cache the precomputed values at the end.
    6413             :     double *padfX =
    6414         459 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6415             :     double *padfY =
    6416         459 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6417             :     double *padfZ =
    6418         459 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6419         459 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6420             : 
    6421         459 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6422         459 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6423         459 :     const double dfErrorThreshold = CPLAtof(
    6424         459 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6425             : 
    6426             :     const bool bOneSourceCornerFailsToReproject =
    6427         459 :         GWKOneSourceCornerFailsToReproject(psJob);
    6428             : 
    6429             :     // Precompute values.
    6430       62854 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6431       62395 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6432             : 
    6433             :     /* ==================================================================== */
    6434             :     /*      Loop over output lines.                                         */
    6435             :     /* ==================================================================== */
    6436       48162 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6437             :     {
    6438             : 
    6439             :         /* --------------------------------------------------------------------
    6440             :          */
    6441             :         /*      Setup points to transform to source image space. */
    6442             :         /* --------------------------------------------------------------------
    6443             :          */
    6444       47703 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6445       47703 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6446     9833535 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6447     9785833 :             padfY[iDstX] = dfY;
    6448       47703 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6449             : 
    6450             :         /* --------------------------------------------------------------------
    6451             :          */
    6452             :         /*      Transform the points from destination pixel/line coordinates */
    6453             :         /*      to source pixel/line coordinates. */
    6454             :         /* --------------------------------------------------------------------
    6455             :          */
    6456       47703 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6457             :                              padfY, padfZ, pabSuccess);
    6458       47703 :         if (dfSrcCoordPrecision > 0.0)
    6459             :         {
    6460           0 :             GWKRoundSourceCoordinates(
    6461             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6462             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6463           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6464             :         }
    6465             :         /* ====================================================================
    6466             :          */
    6467             :         /*      Loop over pixels in output scanline. */
    6468             :         /* ====================================================================
    6469             :          */
    6470     9833535 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6471             :         {
    6472     9785833 :             GPtrDiff_t iSrcOffset = 0;
    6473     9785833 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6474             :                                               padfX, padfY, nSrcXSize,
    6475             :                                               nSrcYSize, iSrcOffset))
    6476     2358945 :                 continue;
    6477             : 
    6478             :             /* --------------------------------------------------------------------
    6479             :              */
    6480             :             /*      Do not try to apply invalid source pixels to the dest. */
    6481             :             /* --------------------------------------------------------------------
    6482             :              */
    6483     9606143 :             if (poWK->panUnifiedSrcValid != nullptr &&
    6484     1127399 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6485             :             {
    6486       49043 :                 if (!bOneSourceCornerFailsToReproject)
    6487             :                 {
    6488       41558 :                     continue;
    6489             :                 }
    6490        7485 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    6491             :                 {
    6492        5224 :                     continue;
    6493             :                 }
    6494             :             }
    6495             : 
    6496             :             /* --------------------------------------------------------------------
    6497             :              */
    6498             :             /*      Do not try to apply transparent source pixels to the
    6499             :              * destination.*/
    6500             :             /* --------------------------------------------------------------------
    6501             :              */
    6502     8431960 :             double dfDensity = 1.0;
    6503             : 
    6504     8431960 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    6505             :             {
    6506     1557335 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    6507     1557335 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    6508     1005075 :                     continue;
    6509             :             }
    6510             : 
    6511             :             /* ====================================================================
    6512             :              */
    6513             :             /*      Loop processing each band. */
    6514             :             /* ====================================================================
    6515             :              */
    6516             : 
    6517     7426888 :             const GPtrDiff_t iDstOffset =
    6518     7426888 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6519             : 
    6520    17415958 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6521             :             {
    6522     9989020 :                 T value = 0;
    6523     9989020 :                 double dfBandDensity = 0.0;
    6524             : 
    6525             :                 /* --------------------------------------------------------------------
    6526             :                  */
    6527             :                 /*      Collect the source value. */
    6528             :                 /* --------------------------------------------------------------------
    6529             :                  */
    6530     9989020 :                 if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
    6531             :                                  &value))
    6532             :                 {
    6533             : 
    6534     9989010 :                     if (poWK->bApplyVerticalShift)
    6535             :                     {
    6536           0 :                         if (!std::isfinite(padfZ[iDstX]))
    6537           0 :                             continue;
    6538             :                         // Subtract padfZ[] since the coordinate transformation
    6539             :                         // is from target to source
    6540           0 :                         value = GWKClampValueT<T>(
    6541           0 :                             double(value) * poWK->dfMultFactorVerticalShift -
    6542           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6543             :                     }
    6544             : 
    6545     9989010 :                     GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    6546             :                                           dfBandDensity, value,
    6547             :                                           bAvoidNoDataSingleBand);
    6548             :                 }
    6549             :             }
    6550             : 
    6551             :             /* --------------------------------------------------------------------
    6552             :              */
    6553             :             /*      Mark this pixel valid/opaque in the output. */
    6554             :             /* --------------------------------------------------------------------
    6555             :              */
    6556             : 
    6557     7426888 :             if (!bAvoidNoDataSingleBand)
    6558             :             {
    6559      424278 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    6560             :             }
    6561             : 
    6562     7426888 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6563             : 
    6564     7426888 :             if (poWK->panDstValid != nullptr)
    6565             :             {
    6566     6156885 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6567             :             }
    6568             :         } /* Next iDstX */
    6569             : 
    6570             :         /* --------------------------------------------------------------------
    6571             :          */
    6572             :         /*      Report progress to the user, and optionally cancel out. */
    6573             :         /* --------------------------------------------------------------------
    6574             :          */
    6575       47703 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6576           0 :             break;
    6577             :     }
    6578             : 
    6579             :     /* -------------------------------------------------------------------- */
    6580             :     /*      Cleanup and return.                                             */
    6581             :     /* -------------------------------------------------------------------- */
    6582         459 :     CPLFree(padfX);
    6583         459 :     CPLFree(padfY);
    6584         459 :     CPLFree(padfZ);
    6585         459 :     CPLFree(pabSuccess);
    6586         459 : }
    6587             : 
    6588         350 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
    6589             : {
    6590         350 :     return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
    6591             : }
    6592             : 
    6593          14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6594             : {
    6595          14 :     return GWKRun(
    6596             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
    6597          14 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
    6598             : }
    6599             : 
    6600           5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6601             : {
    6602           5 :     return GWKRun(
    6603             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
    6604             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
    6605           5 :                                                            GRA_Bilinear>);
    6606             : }
    6607             : 
    6608           6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6609             : {
    6610           6 :     return GWKRun(
    6611             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
    6612             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
    6613           6 :                                                            GRA_Bilinear>);
    6614             : }
    6615             : 
    6616           4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6617             : {
    6618           4 :     return GWKRun(
    6619             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
    6620             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
    6621           4 :                                                            GRA_Bilinear>);
    6622             : }
    6623             : 
    6624             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6625             : 
    6626             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6627             : {
    6628             :     return GWKRun(
    6629             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
    6630             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
    6631             :                                                            GRA_Bilinear>);
    6632             : }
    6633             : #endif
    6634             : 
    6635           5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6636             : {
    6637           5 :     return GWKRun(
    6638             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
    6639           5 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
    6640             : }
    6641             : 
    6642          14 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6643             : {
    6644          14 :     return GWKRun(
    6645             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
    6646          14 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
    6647             : }
    6648             : 
    6649           6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6650             : {
    6651           6 :     return GWKRun(
    6652             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
    6653           6 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
    6654             : }
    6655             : 
    6656           5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6657             : {
    6658           5 :     return GWKRun(
    6659             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
    6660           5 :         GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
    6661             : }
    6662             : 
    6663          45 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
    6664             : {
    6665          45 :     return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
    6666             : }
    6667             : 
    6668          10 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
    6669             : {
    6670          10 :     return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
    6671             : }
    6672             : 
    6673          11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6674             : {
    6675          11 :     return GWKRun(
    6676             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
    6677          11 :         GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
    6678             : }
    6679             : 
    6680          50 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
    6681             : {
    6682          50 :     return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
    6683             : }
    6684             : 
    6685             : /************************************************************************/
    6686             : /*                           GWKAverageOrMode()                         */
    6687             : /*                                                                      */
    6688             : /************************************************************************/
    6689             : 
    6690             : #define COMPUTE_WEIGHT_Y(iSrcY)                                                \
    6691             :     ((iSrcY == iSrcYMin)                                                       \
    6692             :          ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin))        \
    6693             :      : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax)                       \
    6694             :                                : 1.0)
    6695             : 
    6696             : #define COMPUTE_WEIGHT(iSrcX, dfWeightY)                                       \
    6697             :     ((iSrcX == iSrcXMin)       ? ((iSrcXMin + 1 == iSrcXMax)                   \
    6698             :                                       ? dfWeightY                              \
    6699             :                                       : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
    6700             :      : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax))         \
    6701             :                                : dfWeightY)
    6702             : 
    6703             : static void GWKAverageOrModeThread(void *pData);
    6704             : 
    6705         163 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
    6706             : {
    6707         163 :     return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
    6708             : }
    6709             : 
    6710             : /************************************************************************/
    6711             : /*                   GWKAverageOrModeComputeLineCoords()                */
    6712             : /************************************************************************/
    6713             : 
    6714        8183 : static void GWKAverageOrModeComputeLineCoords(
    6715             :     const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
    6716             :     double *padfY2, double *padfZ, double *padfZ2, int *pabSuccess,
    6717             :     int *pabSuccess2, int iDstY, double dfSrcCoordPrecision,
    6718             :     double dfErrorThreshold)
    6719             : {
    6720        8183 :     const GDALWarpKernel *poWK = psJob->poWK;
    6721        8183 :     const int nDstXSize = poWK->nDstXSize;
    6722             : 
    6723             :     // Setup points to transform to source image space.
    6724     2097530 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6725             :     {
    6726     2089340 :         padfX[iDstX] = iDstX + poWK->nDstXOff;
    6727     2089340 :         padfY[iDstX] = iDstY + poWK->nDstYOff;
    6728     2089340 :         padfZ[iDstX] = 0.0;
    6729     2089340 :         padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
    6730     2089340 :         padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
    6731     2089340 :         padfZ2[iDstX] = 0.0;
    6732             :     }
    6733             : 
    6734             :     /* ----------------------------------------------------------------- */
    6735             :     /*      Transform the points from destination pixel/line coordinates */
    6736             :     /*      to source pixel/line coordinates.                            */
    6737             :     /* ----------------------------------------------------------------- */
    6738        8183 :     poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY,
    6739             :                          padfZ, pabSuccess);
    6740        8183 :     poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
    6741             :                          padfY2, padfZ2, pabSuccess2);
    6742             : 
    6743        8183 :     if (dfSrcCoordPrecision > 0.0)
    6744             :     {
    6745           0 :         GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
    6746             :                                   dfSrcCoordPrecision, dfErrorThreshold,
    6747           0 :                                   poWK->pfnTransformer, psJob->pTransformerArg,
    6748           0 :                                   poWK->nDstXOff, iDstY + poWK->nDstYOff);
    6749           0 :         GWKRoundSourceCoordinates(
    6750             :             nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision,
    6751           0 :             dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6752           0 :             1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff);
    6753             :     }
    6754        8183 : }
    6755             : 
    6756             : /************************************************************************/
    6757             : /*              GWKAverageOrModeComputeSourceCoords()                   */
    6758             : /************************************************************************/
    6759             : 
    6760     2089340 : static bool GWKAverageOrModeComputeSourceCoords(
    6761             :     const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
    6762             :     double *padfY2, int iDstX, int iDstY, int nXMargin, int nYMargin,
    6763             :     // Output:
    6764             :     bool &bWrapOverX, double &dfXMin, double &dfYMin, double &dfXMax,
    6765             :     double &dfYMax, int &iSrcXMin, int &iSrcYMin, int &iSrcXMax, int &iSrcYMax)
    6766             : {
    6767     2089340 :     const GDALWarpKernel *poWK = psJob->poWK;
    6768     2089340 :     const int nSrcXSize = poWK->nSrcXSize;
    6769     2089340 :     const int nSrcYSize = poWK->nSrcYSize;
    6770             : 
    6771             :     // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
    6772             :     // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
    6773     2089340 :     if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6774     1991690 :           padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6775     1991690 :           padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6776     1965300 :           padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6777     1965300 :           padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6778     1911930 :           padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6779     1911430 :           padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
    6780     1910040 :           padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
    6781             :     {
    6782      179362 :         return false;
    6783             :     }
    6784             : 
    6785             :     // Compute corners in source crs.
    6786             : 
    6787             :     // The transformation might not have preserved ordering of
    6788             :     // coordinates so do the necessary swapping (#5433).
    6789             :     // NOTE: this is really an approximative fix. To do something
    6790             :     // more precise we would for example need to compute the
    6791             :     // transformation of coordinates in the
    6792             :     // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
    6793             :     // coordinates, and take the bounding box of the got source
    6794             :     // coordinates.
    6795             : 
    6796     1909980 :     if (padfX[iDstX] > padfX2[iDstX])
    6797      268744 :         std::swap(padfX[iDstX], padfX2[iDstX]);
    6798             : 
    6799             :     // Detect situations where the target pixel is close to the
    6800             :     // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
    6801             :     // close to the left-most and right-most columns of the source
    6802             :     // raster. The 2 value below was experimentally determined to
    6803             :     // avoid false-positives and false-negatives.
    6804             :     // Addresses https://github.com/OSGeo/gdal/issues/6478
    6805     1909980 :     bWrapOverX = false;
    6806     1909980 :     const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
    6807     1909980 :     if (poWK->nSrcXOff == 0 &&
    6808     1909980 :         padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
    6809       16499 :         (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale < nThresholdWrapOverX)
    6810             :     {
    6811             :         // Check there is a discontinuity by checking at mid-pixel.
    6812             :         // NOTE: all this remains fragile. To confidently
    6813             :         // detect antimeridian warping we should probably try to access
    6814             :         // georeferenced coordinates, and not rely only on tests on
    6815             :         // image space coordinates. But accessing georeferenced
    6816             :         // coordinates from here is not trivial, and we would for example
    6817             :         // have to handle both geographic, Mercator, etc.
    6818             :         // Let's hope this heuristics is good enough for now.
    6819        1041 :         double x = iDstX + 0.5 + poWK->nDstXOff;
    6820        1041 :         double y = iDstY + poWK->nDstYOff;
    6821        1041 :         double z = 0;
    6822        1041 :         int bSuccess = FALSE;
    6823        1041 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y, &z,
    6824             :                              &bSuccess);
    6825        1041 :         if (bSuccess && x < padfX[iDstX])
    6826             :         {
    6827        1008 :             bWrapOverX = true;
    6828        1008 :             std::swap(padfX[iDstX], padfX2[iDstX]);
    6829        1008 :             padfX2[iDstX] += nSrcXSize;
    6830             :         }
    6831             :     }
    6832             : 
    6833     1909980 :     dfXMin = padfX[iDstX] - poWK->nSrcXOff;
    6834     1909980 :     dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
    6835     1909980 :     constexpr double EPSILON = 1e-10;
    6836             :     // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
    6837     1909980 :     if (!(dfXMax > -EPSILON && dfXMin < nSrcXSize + EPSILON))
    6838         156 :         return false;
    6839     1909830 :     iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPSILON), 0.0));
    6840     1909830 :     iSrcXMax = static_cast<int>(
    6841     1909830 :         std::min(ceil(dfXMax - EPSILON), static_cast<double>(INT_MAX)));
    6842     1909830 :     if (!bWrapOverX)
    6843     1908820 :         iSrcXMax = std::min(iSrcXMax, nSrcXSize);
    6844     1909830 :     if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
    6845         472 :         iSrcXMax++;
    6846             : 
    6847     1909830 :     if (padfY[iDstX] > padfY2[iDstX])
    6848      270117 :         std::swap(padfY[iDstX], padfY2[iDstX]);
    6849     1909830 :     dfYMin = padfY[iDstX] - poWK->nSrcYOff;
    6850     1909830 :     dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
    6851             :     // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
    6852     1909830 :     if (!(dfYMax > -EPSILON && dfYMin < nSrcYSize + EPSILON))
    6853          78 :         return false;
    6854     1909750 :     iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPSILON), 0.0));
    6855     1909750 :     iSrcYMax = std::min(static_cast<int>(ceil(dfYMax - EPSILON)), nSrcYSize);
    6856     1909750 :     if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
    6857           0 :         iSrcYMax++;
    6858             : 
    6859     1909750 :     return true;
    6860             : }
    6861             : 
    6862             : /************************************************************************/
    6863             : /*                         GWKModeRealType()                            */
    6864             : /************************************************************************/
    6865             : 
    6866       17780 : template <class T> static inline bool IsSame(T a, T b)
    6867             : {
    6868       17780 :     return a == b;
    6869             : }
    6870             : 
    6871           0 : template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b)
    6872             : {
    6873           0 :     return a == b || (CPLIsNan(a) && CPLIsNan(b));
    6874             : }
    6875             : 
    6876          18 : template <> bool IsSame<float>(float a, float b)
    6877             : {
    6878          18 :     return a == b || (std::isnan(a) && std::isnan(b));
    6879             : }
    6880             : 
    6881          56 : template <> bool IsSame<double>(double a, double b)
    6882             : {
    6883          56 :     return a == b || (std::isnan(a) && std::isnan(b));
    6884             : }
    6885             : 
    6886          19 : template <class T> static void GWKModeRealType(GWKJobStruct *psJob)
    6887             : {
    6888          19 :     const GDALWarpKernel *poWK = psJob->poWK;
    6889          19 :     const int iYMin = psJob->iYMin;
    6890          19 :     const int iYMax = psJob->iYMax;
    6891          19 :     const int nDstXSize = poWK->nDstXSize;
    6892          19 :     const int nSrcXSize = poWK->nSrcXSize;
    6893          19 :     const int nSrcYSize = poWK->nSrcYSize;
    6894          19 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    6895             : 
    6896          19 :     T *pVals = nullptr;
    6897          19 :     float *pafCounts = nullptr;
    6898             : 
    6899          19 :     if (nSrcXSize > 0 && nSrcYSize > 0)
    6900             :     {
    6901             :         pVals = static_cast<T *>(
    6902          19 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(T)));
    6903             :         pafCounts = static_cast<float *>(
    6904          19 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    6905          19 :         if (pVals == nullptr || pafCounts == nullptr)
    6906             :         {
    6907           0 :             VSIFree(pVals);
    6908           0 :             VSIFree(pafCounts);
    6909           0 :             return;
    6910             :         }
    6911             :     }
    6912             : 
    6913             :     /* -------------------------------------------------------------------- */
    6914             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    6915             :     /*      scanlines worth of positions.                                   */
    6916             :     /* -------------------------------------------------------------------- */
    6917             : 
    6918             :     double *padfX =
    6919          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6920             :     double *padfY =
    6921          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6922             :     double *padfZ =
    6923          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6924             :     double *padfX2 =
    6925          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6926             :     double *padfY2 =
    6927          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6928             :     double *padfZ2 =
    6929          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6930          19 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6931          19 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6932             : 
    6933          19 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6934          19 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6935          19 :     const double dfErrorThreshold = CPLAtof(
    6936          19 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6937          19 :     const bool bAvoidNoDataSingleBand =
    6938          19 :         poWK->nBands == 1 ||
    6939           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    6940             :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    6941             : 
    6942          19 :     const int nXMargin =
    6943          19 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    6944          19 :     const int nYMargin =
    6945          19 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    6946             : 
    6947             :     /* ==================================================================== */
    6948             :     /*      Loop over output lines.                                         */
    6949             :     /* ==================================================================== */
    6950         116 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6951             :     {
    6952          97 :         GWKAverageOrModeComputeLineCoords(
    6953             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    6954             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    6955             : 
    6956             :         // Loop over pixels in output scanline.
    6957        3514 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6958             :         {
    6959        3417 :             GPtrDiff_t iSrcOffset = 0;
    6960        3417 :             double dfDensity = 1.0;
    6961        3417 :             bool bHasFoundDensity = false;
    6962             : 
    6963        3417 :             bool bWrapOverX = false;
    6964        3417 :             double dfXMin = 0;
    6965        3417 :             double dfYMin = 0;
    6966        3417 :             double dfXMax = 0;
    6967        3417 :             double dfYMax = 0;
    6968        3417 :             int iSrcXMin = 0;
    6969        3417 :             int iSrcYMin = 0;
    6970        3417 :             int iSrcXMax = 0;
    6971        3417 :             int iSrcYMax = 0;
    6972        3417 :             if (!GWKAverageOrModeComputeSourceCoords(
    6973             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    6974             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    6975             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    6976             :             {
    6977           0 :                 continue;
    6978             :             }
    6979             : 
    6980        3417 :             const GPtrDiff_t iDstOffset =
    6981        3417 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6982             : 
    6983             :             // Loop processing each band.
    6984        6834 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6985             :             {
    6986        3417 :                 double dfBandDensity = 0.0;
    6987             : 
    6988        3417 :                 int nBins = 0;
    6989        3417 :                 int iModeIndex = -1;
    6990        3417 :                 T nVal{};
    6991             : 
    6992       10248 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    6993             :                 {
    6994        6831 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    6995        6831 :                     iSrcOffset =
    6996        6831 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    6997       20530 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    6998             :                          iSrcX++, iSrcOffset++)
    6999             :                     {
    7000       13699 :                         if (bWrapOverX)
    7001           0 :                             iSrcOffset =
    7002           0 :                                 (iSrcX % nSrcXSize) +
    7003           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7004             : 
    7005       13699 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7006           0 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7007           0 :                             continue;
    7008             : 
    7009       13699 :                         if (GWKGetPixelT(poWK, iBand, iSrcOffset,
    7010       27398 :                                          &dfBandDensity, &nVal) &&
    7011       13699 :                             dfBandDensity > BAND_DENSITY_THRESHOLD)
    7012             :                         {
    7013       13699 :                             const double dfWeight =
    7014       13699 :                                 COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7015             : 
    7016             :                             // Check array for existing entry.
    7017       13699 :                             int i = 0;
    7018       29194 :                             for (i = 0; i < nBins; ++i)
    7019             :                             {
    7020       17807 :                                 if (IsSame(pVals[i], nVal))
    7021             :                                 {
    7022             : 
    7023        2312 :                                     pafCounts[i] +=
    7024        2312 :                                         static_cast<float>(dfWeight);
    7025        2312 :                                     bool bValIsMaxCount =
    7026        2312 :                                         (pafCounts[i] > pafCounts[iModeIndex]);
    7027             : 
    7028        2312 :                                     if (!bValIsMaxCount &&
    7029        1498 :                                         pafCounts[i] == pafCounts[iModeIndex])
    7030             :                                     {
    7031        1490 :                                         switch (eTieStrategy)
    7032             :                                         {
    7033        1477 :                                             case GWKTS_First:
    7034        1477 :                                                 break;
    7035           6 :                                             case GWKTS_Min:
    7036           6 :                                                 bValIsMaxCount =
    7037           6 :                                                     nVal < pVals[iModeIndex];
    7038           6 :                                                 break;
    7039           7 :                                             case GWKTS_Max:
    7040           7 :                                                 bValIsMaxCount =
    7041           7 :                                                     nVal > pVals[iModeIndex];
    7042           7 :                                                 break;
    7043             :                                         }
    7044             :                                     }
    7045             : 
    7046        2312 :                                     if (bValIsMaxCount)
    7047             :                                     {
    7048         817 :                                         iModeIndex = i;
    7049             :                                     }
    7050             : 
    7051        2312 :                                     break;
    7052             :                                 }
    7053             :                             }
    7054             : 
    7055             :                             // Add to arr if entry not already there.
    7056       13699 :                             if (i == nBins)
    7057             :                             {
    7058       11387 :                                 pVals[i] = nVal;
    7059       11387 :                                 pafCounts[i] = static_cast<float>(dfWeight);
    7060             : 
    7061       11387 :                                 if (iModeIndex < 0)
    7062        3417 :                                     iModeIndex = i;
    7063             : 
    7064       11387 :                                 ++nBins;
    7065             :                             }
    7066             :                         }
    7067             :                     }
    7068             :                 }
    7069             : 
    7070        3417 :                 if (iModeIndex != -1)
    7071             :                 {
    7072        3417 :                     nVal = pVals[iModeIndex];
    7073        3417 :                     dfBandDensity = 1;
    7074        3417 :                     bHasFoundDensity = true;
    7075             :                 }
    7076             : 
    7077             :                 // We have a computed value from the source.  Now apply it
    7078             :                 // to the destination pixel
    7079        3417 :                 if (bHasFoundDensity)
    7080             :                 {
    7081        3417 :                     GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    7082             :                                           dfBandDensity, nVal,
    7083             :                                           bAvoidNoDataSingleBand);
    7084             :                 }
    7085             :             }
    7086             : 
    7087        3417 :             if (!bHasFoundDensity)
    7088           0 :                 continue;
    7089             : 
    7090        3417 :             if (!bAvoidNoDataSingleBand)
    7091             :             {
    7092           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7093             :             }
    7094             : 
    7095             :             /* --------------------------------------------------------------------
    7096             :              */
    7097             :             /*      Update destination density/validity masks. */
    7098             :             /* --------------------------------------------------------------------
    7099             :              */
    7100        3417 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7101             : 
    7102        3417 :             if (poWK->panDstValid != nullptr)
    7103             :             {
    7104           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7105             :             }
    7106             :         } /* Next iDstX */
    7107             : 
    7108             :         /* --------------------------------------------------------------------
    7109             :          */
    7110             :         /*      Report progress to the user, and optionally cancel out. */
    7111             :         /* --------------------------------------------------------------------
    7112             :          */
    7113          97 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    7114           0 :             break;
    7115             :     }
    7116             : 
    7117             :     /* -------------------------------------------------------------------- */
    7118             :     /*      Cleanup and return.                                             */
    7119             :     /* -------------------------------------------------------------------- */
    7120          19 :     CPLFree(padfX);
    7121          19 :     CPLFree(padfY);
    7122          19 :     CPLFree(padfZ);
    7123          19 :     CPLFree(padfX2);
    7124          19 :     CPLFree(padfY2);
    7125          19 :     CPLFree(padfZ2);
    7126          19 :     CPLFree(pabSuccess);
    7127          19 :     CPLFree(pabSuccess2);
    7128          19 :     VSIFree(pVals);
    7129          19 :     VSIFree(pafCounts);
    7130             : }
    7131             : 
    7132             : /************************************************************************/
    7133             : /*                        GWKModeComplexType()                          */
    7134             : /************************************************************************/
    7135             : 
    7136           8 : static void GWKModeComplexType(GWKJobStruct *psJob)
    7137             : {
    7138           8 :     const GDALWarpKernel *poWK = psJob->poWK;
    7139           8 :     const int iYMin = psJob->iYMin;
    7140           8 :     const int iYMax = psJob->iYMax;
    7141           8 :     const int nDstXSize = poWK->nDstXSize;
    7142           8 :     const int nSrcXSize = poWK->nSrcXSize;
    7143           8 :     const int nSrcYSize = poWK->nSrcYSize;
    7144           8 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7145             :     const double dfMultFactorVerticalShiftPipeline =
    7146           8 :         poWK->bApplyVerticalShift
    7147           8 :             ? CPLAtof(CSLFetchNameValueDef(
    7148           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    7149             :                   "1.0"))
    7150           8 :             : 0.0;
    7151             :     const bool bAvoidNoDataSingleBand =
    7152           8 :         poWK->nBands == 1 ||
    7153           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7154           8 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    7155             : 
    7156           8 :     double *padfRealVals = nullptr;
    7157           8 :     double *padfImagVals = nullptr;
    7158           8 :     float *pafCounts = nullptr;
    7159             : 
    7160           8 :     if (nSrcXSize > 0 && nSrcYSize > 0)
    7161             :     {
    7162             :         padfRealVals = static_cast<double *>(
    7163           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
    7164             :         padfImagVals = static_cast<double *>(
    7165           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
    7166             :         pafCounts = static_cast<float *>(
    7167           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    7168           8 :         if (padfRealVals == nullptr || padfImagVals == nullptr ||
    7169             :             pafCounts == nullptr)
    7170             :         {
    7171           0 :             VSIFree(padfRealVals);
    7172           0 :             VSIFree(padfImagVals);
    7173           0 :             VSIFree(pafCounts);
    7174           0 :             return;
    7175             :         }
    7176             :     }
    7177             : 
    7178             :     /* -------------------------------------------------------------------- */
    7179             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7180             :     /*      scanlines worth of positions.                                   */
    7181             :     /* -------------------------------------------------------------------- */
    7182             : 
    7183             :     double *padfX =
    7184           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7185             :     double *padfY =
    7186           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7187             :     double *padfZ =
    7188           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7189             :     double *padfX2 =
    7190           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7191             :     double *padfY2 =
    7192           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7193             :     double *padfZ2 =
    7194           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7195           8 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7196           8 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7197             : 
    7198           8 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7199           8 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7200           8 :     const double dfErrorThreshold = CPLAtof(
    7201           8 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7202             : 
    7203             :     const int nXMargin =
    7204           8 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7205             :     const int nYMargin =
    7206           8 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7207             : 
    7208             :     /* ==================================================================== */
    7209             :     /*      Loop over output lines.                                         */
    7210             :     /* ==================================================================== */
    7211          16 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7212             :     {
    7213           8 :         GWKAverageOrModeComputeLineCoords(
    7214             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7215             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7216             : 
    7217             :         // Loop over pixels in output scanline.
    7218          16 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7219             :         {
    7220           8 :             GPtrDiff_t iSrcOffset = 0;
    7221           8 :             double dfDensity = 1.0;
    7222           8 :             bool bHasFoundDensity = false;
    7223             : 
    7224           8 :             bool bWrapOverX = false;
    7225           8 :             double dfXMin = 0;
    7226           8 :             double dfYMin = 0;
    7227           8 :             double dfXMax = 0;
    7228           8 :             double dfYMax = 0;
    7229           8 :             int iSrcXMin = 0;
    7230           8 :             int iSrcYMin = 0;
    7231           8 :             int iSrcXMax = 0;
    7232           8 :             int iSrcYMax = 0;
    7233           8 :             if (!GWKAverageOrModeComputeSourceCoords(
    7234             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7235             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7236             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7237             :             {
    7238           0 :                 continue;
    7239             :             }
    7240             : 
    7241           8 :             const GPtrDiff_t iDstOffset =
    7242           8 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7243             : 
    7244             :             // Loop processing each band.
    7245          16 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7246             :             {
    7247           8 :                 double dfBandDensity = 0.0;
    7248             : 
    7249           8 :                 int nBins = 0;
    7250           8 :                 int iModeIndex = -1;
    7251           8 :                 double dfValueReal = 0;
    7252           8 :                 double dfValueImag = 0;
    7253             : 
    7254          16 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7255             :                 {
    7256           8 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7257           8 :                     iSrcOffset =
    7258           8 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7259          38 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7260             :                          iSrcX++, iSrcOffset++)
    7261             :                     {
    7262          30 :                         if (bWrapOverX)
    7263           0 :                             iSrcOffset =
    7264           0 :                                 (iSrcX % nSrcXSize) +
    7265           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7266             : 
    7267          30 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7268           0 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7269           0 :                             continue;
    7270             : 
    7271          30 :                         if (GWKGetPixelValue(poWK, iBand, iSrcOffset,
    7272             :                                              &dfBandDensity, &dfValueReal,
    7273          60 :                                              &dfValueImag) &&
    7274          30 :                             dfBandDensity > BAND_DENSITY_THRESHOLD)
    7275             :                         {
    7276          30 :                             const double dfWeight =
    7277          30 :                                 COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7278             : 
    7279             :                             // Check array for existing entry.
    7280          30 :                             int i = 0;
    7281          49 :                             for (i = 0; i < nBins; ++i)
    7282             :                             {
    7283          47 :                                 if (IsSame(padfRealVals[i], dfValueReal) &&
    7284          14 :                                     IsSame(padfImagVals[i], dfValueImag))
    7285             :                                 {
    7286             : 
    7287          14 :                                     pafCounts[i] +=
    7288          14 :                                         static_cast<float>(dfWeight);
    7289          14 :                                     bool bValIsMaxCount =
    7290          14 :                                         (pafCounts[i] > pafCounts[iModeIndex]);
    7291             : 
    7292          14 :                                     if (!bValIsMaxCount &&
    7293           6 :                                         pafCounts[i] == pafCounts[iModeIndex])
    7294             :                                     {
    7295           3 :                                         switch (eTieStrategy)
    7296             :                                         {
    7297           3 :                                             case GWKTS_First:
    7298           3 :                                                 break;
    7299           0 :                                             case GWKTS_Min:
    7300           0 :                                                 bValIsMaxCount =
    7301           0 :                                                     dfValueReal <
    7302           0 :                                                     padfRealVals[iModeIndex];
    7303           0 :                                                 break;
    7304           0 :                                             case GWKTS_Max:
    7305           0 :                                                 bValIsMaxCount =
    7306           0 :                                                     dfValueReal >
    7307           0 :                                                     padfRealVals[iModeIndex];
    7308           0 :                                                 break;
    7309             :                                         }
    7310             :                                     }
    7311             : 
    7312          14 :                                     if (bValIsMaxCount)
    7313             :                                     {
    7314           8 :                                         iModeIndex = i;
    7315             :                                     }
    7316             : 
    7317          14 :                                     break;
    7318             :                                 }
    7319             :                             }
    7320             : 
    7321             :                             // Add to arr if entry not already there.
    7322          30 :                             if (i == nBins)
    7323             :                             {
    7324          16 :                                 padfRealVals[i] = dfValueReal;
    7325          16 :                                 padfImagVals[i] = dfValueImag;
    7326          16 :                                 pafCounts[i] = static_cast<float>(dfWeight);
    7327             : 
    7328          16 :                                 if (iModeIndex < 0)
    7329           8 :                                     iModeIndex = i;
    7330             : 
    7331          16 :                                 ++nBins;
    7332             :                             }
    7333             :                         }
    7334             :                     }
    7335             :                 }
    7336             : 
    7337           8 :                 if (iModeIndex != -1)
    7338             :                 {
    7339           8 :                     dfValueReal = padfRealVals[iModeIndex];
    7340           8 :                     dfValueImag = padfImagVals[iModeIndex];
    7341           8 :                     dfBandDensity = 1;
    7342             : 
    7343           8 :                     if (poWK->bApplyVerticalShift)
    7344             :                     {
    7345           0 :                         if (!std::isfinite(padfZ[iDstX]))
    7346           0 :                             continue;
    7347             :                         // Subtract padfZ[] since the coordinate
    7348             :                         // transformation is from target to source
    7349           0 :                         dfValueReal =
    7350           0 :                             dfValueReal * poWK->dfMultFactorVerticalShift -
    7351           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    7352             :                     }
    7353             : 
    7354           8 :                     bHasFoundDensity = true;
    7355             :                 }
    7356             : 
    7357             :                 // We have a computed value from the source.  Now apply it
    7358             :                 // to the destination pixel
    7359           8 :                 if (bHasFoundDensity)
    7360             :                 {
    7361           8 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    7362             :                                      dfValueReal, dfValueImag,
    7363             :                                      bAvoidNoDataSingleBand);
    7364             :                 }
    7365             :             }
    7366             : 
    7367           8 :             if (!bHasFoundDensity)
    7368           0 :                 continue;
    7369             : 
    7370           8 :             if (!bAvoidNoDataSingleBand)
    7371             :             {
    7372           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7373             :             }
    7374             : 
    7375             :             /* --------------------------------------------------------------------
    7376             :              */
    7377             :             /*      Update destination density/validity masks. */
    7378             :             /* --------------------------------------------------------------------
    7379             :              */
    7380           8 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7381             : 
    7382           8 :             if (poWK->panDstValid != nullptr)
    7383             :             {
    7384           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7385             :             }
    7386             :         } /* Next iDstX */
    7387             : 
    7388             :         /* --------------------------------------------------------------------
    7389             :          */
    7390             :         /*      Report progress to the user, and optionally cancel out. */
    7391             :         /* --------------------------------------------------------------------
    7392             :          */
    7393           8 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    7394           0 :             break;
    7395             :     }
    7396             : 
    7397             :     /* -------------------------------------------------------------------- */
    7398             :     /*      Cleanup and return.                                             */
    7399             :     /* -------------------------------------------------------------------- */
    7400           8 :     CPLFree(padfX);
    7401           8 :     CPLFree(padfY);
    7402           8 :     CPLFree(padfZ);
    7403           8 :     CPLFree(padfX2);
    7404           8 :     CPLFree(padfY2);
    7405           8 :     CPLFree(padfZ2);
    7406           8 :     CPLFree(pabSuccess);
    7407           8 :     CPLFree(pabSuccess2);
    7408           8 :     VSIFree(padfRealVals);
    7409           8 :     VSIFree(padfImagVals);
    7410           8 :     VSIFree(pafCounts);
    7411             : }
    7412             : 
    7413             : /************************************************************************/
    7414             : /*                       GWKAverageOrModeThread()                       */
    7415             : /************************************************************************/
    7416             : 
    7417             : // Overall logic based on GWKGeneralCaseThread().
    7418         163 : static void GWKAverageOrModeThread(void *pData)
    7419             : {
    7420         163 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    7421         163 :     const GDALWarpKernel *poWK = psJob->poWK;
    7422         163 :     const int iYMin = psJob->iYMin;
    7423         163 :     const int iYMax = psJob->iYMax;
    7424             :     const double dfMultFactorVerticalShiftPipeline =
    7425         163 :         poWK->bApplyVerticalShift
    7426         163 :             ? CPLAtof(CSLFetchNameValueDef(
    7427           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    7428             :                   "1.0"))
    7429         163 :             : 0.0;
    7430             :     const bool bAvoidNoDataSingleBand =
    7431         194 :         poWK->nBands == 1 ||
    7432          31 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7433         163 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    7434             : 
    7435         163 :     const int nDstXSize = poWK->nDstXSize;
    7436         163 :     const int nSrcXSize = poWK->nSrcXSize;
    7437             : 
    7438             :     /* -------------------------------------------------------------------- */
    7439             :     /*      Find out which algorithm to use (small optim.)                  */
    7440             :     /* -------------------------------------------------------------------- */
    7441             : 
    7442             :     // Only used for GRA_Mode
    7443         163 :     float *pafCounts = nullptr;
    7444         163 :     int nBins = 0;
    7445         163 :     int nBinsOffset = 0;
    7446         163 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7447             : 
    7448             :     // Only used with Q1, Med and Q3
    7449         163 :     float quant = 0.0f;
    7450             : 
    7451             :     // To control array allocation only when data type is complex
    7452         163 :     const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
    7453             : 
    7454         163 :     if (poWK->eResample == GRA_Mode)
    7455             :     {
    7456          45 :         if (poWK->bApplyVerticalShift)
    7457             :         {
    7458           0 :             return GWKModeComplexType(psJob);
    7459             :         }
    7460             : 
    7461          45 :         switch (poWK->eWorkingDataType)
    7462             :         {
    7463           7 :             case GDT_UInt8:
    7464           7 :                 nBins = 256;
    7465           7 :                 break;
    7466             : 
    7467           0 :             case GDT_Int8:
    7468           0 :                 nBins = 256;
    7469           0 :                 nBinsOffset = nBins / 2;
    7470           0 :                 break;
    7471             : 
    7472           1 :             case GDT_UInt16:
    7473           1 :                 nBins = 65536;
    7474           1 :                 break;
    7475             : 
    7476          10 :             case GDT_Int16:
    7477          10 :                 nBins = 65536;
    7478          10 :                 nBinsOffset = nBins / 2;
    7479          10 :                 break;
    7480             : 
    7481          10 :             case GDT_Int32:
    7482          10 :                 return GWKModeRealType<int32_t>(psJob);
    7483             : 
    7484           1 :             case GDT_UInt32:
    7485           1 :                 return GWKModeRealType<uint32_t>(psJob);
    7486             : 
    7487           1 :             case GDT_Int64:
    7488           1 :                 return GWKModeRealType<int64_t>(psJob);
    7489             : 
    7490           1 :             case GDT_UInt64:
    7491           1 :                 return GWKModeRealType<uint64_t>(psJob);
    7492             : 
    7493           0 :             case GDT_Float16:
    7494           0 :                 return GWKModeRealType<GFloat16>(psJob);
    7495             : 
    7496           4 :             case GDT_Float32:
    7497           4 :                 return GWKModeRealType<float>(psJob);
    7498             : 
    7499           2 :             case GDT_Float64:
    7500           2 :                 return GWKModeRealType<double>(psJob);
    7501             : 
    7502           8 :             case GDT_CInt16:
    7503             :             case GDT_CInt32:
    7504             :             case GDT_CFloat16:
    7505             :             case GDT_CFloat32:
    7506             :             case GDT_CFloat64:
    7507           8 :                 return GWKModeComplexType(psJob);
    7508             : 
    7509           0 :             case GDT_Unknown:
    7510             :             case GDT_TypeCount:
    7511           0 :                 CPLAssert(false);
    7512             :                 return;
    7513             :         }
    7514             : 
    7515          18 :         if (nBins)
    7516             :         {
    7517             :             pafCounts =
    7518          18 :                 static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
    7519          18 :             if (pafCounts == nullptr)
    7520           0 :                 return;
    7521             :         }
    7522             :     }
    7523         118 :     else if (poWK->eResample == GRA_Med)
    7524             :     {
    7525           6 :         quant = 0.5f;
    7526             :     }
    7527         112 :     else if (poWK->eResample == GRA_Q1)
    7528             :     {
    7529          10 :         quant = 0.25f;
    7530             :     }
    7531         102 :     else if (poWK->eResample == GRA_Q3)
    7532             :     {
    7533           5 :         quant = 0.75f;
    7534             :     }
    7535          97 :     else if (poWK->eResample != GRA_Average && poWK->eResample != GRA_RMS &&
    7536          11 :              poWK->eResample != GRA_Min && poWK->eResample != GRA_Max)
    7537             :     {
    7538             :         // Other resample algorithms not permitted here.
    7539           0 :         CPLError(CE_Fatal, CPLE_AppDefined,
    7540             :                  "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
    7541             :                  "illegal resample");
    7542             :     }
    7543             : 
    7544         136 :     CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread()");
    7545             : 
    7546             :     /* -------------------------------------------------------------------- */
    7547             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7548             :     /*      scanlines worth of positions.                                   */
    7549             :     /* -------------------------------------------------------------------- */
    7550             : 
    7551             :     double *padfX =
    7552         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7553             :     double *padfY =
    7554         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7555             :     double *padfZ =
    7556         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7557             :     double *padfX2 =
    7558         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7559             :     double *padfY2 =
    7560         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7561             :     double *padfZ2 =
    7562         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7563         136 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7564         136 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7565             : 
    7566         136 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7567         136 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7568         136 :     const double dfErrorThreshold = CPLAtof(
    7569         136 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7570             : 
    7571             :     const double dfExcludedValuesThreshold =
    7572         136 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7573             :                                      "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
    7574         136 :         100.0;
    7575             :     const double dfNodataValuesThreshold =
    7576         136 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7577             :                                      "NODATA_VALUES_PCT_THRESHOLD", "100")) /
    7578         136 :         100.0;
    7579             : 
    7580             :     const int nXMargin =
    7581         136 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7582             :     const int nYMargin =
    7583         136 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7584             : 
    7585             :     /* ==================================================================== */
    7586             :     /*      Loop over output lines.                                         */
    7587             :     /* ==================================================================== */
    7588        8214 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7589             :     {
    7590        8078 :         GWKAverageOrModeComputeLineCoords(
    7591             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7592             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7593             : 
    7594             :         /* ====================================================================
    7595             :          */
    7596             :         /*      Loop over pixels in output scanline. */
    7597             :         /* ====================================================================
    7598             :          */
    7599     2094000 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7600             :         {
    7601     2085920 :             GPtrDiff_t iSrcOffset = 0;
    7602     2085920 :             double dfDensity = 1.0;
    7603     2085920 :             bool bHasFoundDensity = false;
    7604             : 
    7605     2085920 :             bool bWrapOverX = false;
    7606     2085920 :             double dfXMin = 0;
    7607     2085920 :             double dfYMin = 0;
    7608     2085920 :             double dfXMax = 0;
    7609     2085920 :             double dfYMax = 0;
    7610     2085920 :             int iSrcXMin = 0;
    7611     2085920 :             int iSrcYMin = 0;
    7612     2085920 :             int iSrcXMax = 0;
    7613     2085920 :             int iSrcYMax = 0;
    7614     2085920 :             if (!GWKAverageOrModeComputeSourceCoords(
    7615             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7616             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7617             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7618             :             {
    7619      687183 :                 continue;
    7620             :             }
    7621             : 
    7622     1906320 :             const GPtrDiff_t iDstOffset =
    7623     1906320 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7624             : 
    7625     1906320 :             bool bDone = false;
    7626             : 
    7627             :             // Special Average mode where we process all bands together,
    7628             :             // to avoid averaging tuples that match an entry of m_aadfExcludedValues
    7629     1906320 :             constexpr double EPSILON = 1e-10;
    7630     4613330 :             if (poWK->eResample == GRA_Average &&
    7631      800681 :                 (!poWK->m_aadfExcludedValues.empty() ||
    7632      589832 :                  dfNodataValuesThreshold < 1 - EPSILON) &&
    7633     2707000 :                 !poWK->bApplyVerticalShift && !bIsComplex)
    7634             :             {
    7635      589832 :                 double dfTotalWeightInvalid = 0.0;
    7636      589832 :                 double dfTotalWeightExcluded = 0.0;
    7637      589832 :                 double dfTotalWeightRegular = 0.0;
    7638     1179660 :                 std::vector<double> adfValueReal(poWK->nBands, 0);
    7639     1179660 :                 std::vector<double> adfValueAveraged(poWK->nBands, 0);
    7640             :                 std::vector<int> anCountExcludedValues(
    7641      589832 :                     poWK->m_aadfExcludedValues.size(), 0);
    7642             : 
    7643     2162710 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7644             :                 {
    7645     1572880 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7646     1572880 :                     iSrcOffset =
    7647     1572880 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7648     6291500 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7649             :                          iSrcX++, iSrcOffset++)
    7650             :                     {
    7651     4718620 :                         if (bWrapOverX)
    7652           0 :                             iSrcOffset =
    7653           0 :                                 (iSrcX % nSrcXSize) +
    7654           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7655             : 
    7656     4718620 :                         const double dfWeight =
    7657     4718620 :                             COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7658     4718620 :                         if (dfWeight <= 0)
    7659           0 :                             continue;
    7660             : 
    7661     4718640 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7662          12 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7663             :                         {
    7664           3 :                             dfTotalWeightInvalid += dfWeight;
    7665           3 :                             continue;
    7666             :                         }
    7667             : 
    7668     4718620 :                         bool bAllValid = true;
    7669     8651150 :                         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7670             :                         {
    7671     7340300 :                             double dfBandDensity = 0;
    7672     7340300 :                             double dfValueImagTmp = 0;
    7673    11272800 :                             if (!(GWKGetPixelValue(
    7674             :                                       poWK, iBand, iSrcOffset, &dfBandDensity,
    7675     7340300 :                                       &adfValueReal[iBand], &dfValueImagTmp) &&
    7676     3932530 :                                   dfBandDensity > BAND_DENSITY_THRESHOLD))
    7677             :                             {
    7678     3407770 :                                 bAllValid = false;
    7679     3407770 :                                 break;
    7680             :                             }
    7681             :                         }
    7682             : 
    7683     4718620 :                         if (!bAllValid)
    7684             :                         {
    7685     3407770 :                             dfTotalWeightInvalid += dfWeight;
    7686     3407770 :                             continue;
    7687             :                         }
    7688             : 
    7689     1310850 :                         bool bExcludedValueFound = false;
    7690     2490500 :                         for (size_t i = 0;
    7691     2490500 :                              i < poWK->m_aadfExcludedValues.size(); ++i)
    7692             :                         {
    7693     1179670 :                             if (poWK->m_aadfExcludedValues[i] == adfValueReal)
    7694             :                             {
    7695          22 :                                 bExcludedValueFound = true;
    7696          22 :                                 ++anCountExcludedValues[i];
    7697          22 :                                 dfTotalWeightExcluded += dfWeight;
    7698          22 :                                 break;
    7699             :                             }
    7700             :                         }
    7701     1310850 :                         if (!bExcludedValueFound)
    7702             :                         {
    7703             :                             // Weighted incremental algorithm mean
    7704             :                             // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7705     1310830 :                             dfTotalWeightRegular += dfWeight;
    7706     5243290 :                             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7707             :                             {
    7708     3932460 :                                 adfValueAveraged[iBand] +=
    7709     7864930 :                                     (dfWeight / dfTotalWeightRegular) *
    7710     7864930 :                                     (adfValueReal[iBand] -
    7711     3932460 :                                      adfValueAveraged[iBand]);
    7712             :                             }
    7713             :                         }
    7714             :                     }
    7715             :                 }
    7716             : 
    7717      589832 :                 const double dfTotalWeight = dfTotalWeightInvalid +
    7718             :                                              dfTotalWeightExcluded +
    7719             :                                              dfTotalWeightRegular;
    7720      589832 :                 if (dfTotalWeightInvalid > 0 &&
    7721             :                     dfTotalWeightInvalid >=
    7722      458751 :                         dfNodataValuesThreshold * dfTotalWeight)
    7723             :                 {
    7724             :                     // Do nothing. Let bHasFoundDensity to false.
    7725             :                 }
    7726      131085 :                 else if (dfTotalWeightExcluded > 0 &&
    7727             :                          dfTotalWeightExcluded >=
    7728           7 :                              dfExcludedValuesThreshold * dfTotalWeight)
    7729             :                 {
    7730             :                     // Find the most represented excluded value tuple
    7731           3 :                     size_t iExcludedValue = 0;
    7732           3 :                     int nExcludedValueCount = 0;
    7733           6 :                     for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
    7734             :                          ++i)
    7735             :                     {
    7736           3 :                         if (anCountExcludedValues[i] > nExcludedValueCount)
    7737             :                         {
    7738           3 :                             iExcludedValue = i;
    7739           3 :                             nExcludedValueCount = anCountExcludedValues[i];
    7740             :                         }
    7741             :                     }
    7742             : 
    7743           3 :                     bHasFoundDensity = true;
    7744             : 
    7745          12 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7746             :                     {
    7747           9 :                         GWKSetPixelValue(
    7748             :                             poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
    7749           9 :                             poWK->m_aadfExcludedValues[iExcludedValue][iBand],
    7750             :                             0, bAvoidNoDataSingleBand);
    7751             :                     }
    7752             : 
    7753           3 :                     if (!bAvoidNoDataSingleBand)
    7754             :                     {
    7755           0 :                         GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7756           3 :                     }
    7757             :                 }
    7758      131082 :                 else if (dfTotalWeightRegular > 0)
    7759             :                 {
    7760      131082 :                     bHasFoundDensity = true;
    7761             : 
    7762      524324 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7763             :                     {
    7764      393242 :                         GWKSetPixelValue(poWK, iBand, iDstOffset,
    7765             :                                          /* dfBandDensity = */ 1.0,
    7766      393242 :                                          adfValueAveraged[iBand], 0,
    7767             :                                          bAvoidNoDataSingleBand);
    7768             :                     }
    7769             : 
    7770      131082 :                     if (!bAvoidNoDataSingleBand)
    7771             :                     {
    7772           0 :                         GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7773             :                     }
    7774             :                 }
    7775             : 
    7776             :                 // Skip below loop on bands
    7777      589832 :                 bDone = true;
    7778             :             }
    7779             : 
    7780             :             /* ====================================================================
    7781             :              */
    7782             :             /*      Loop processing each band. */
    7783             :             /* ====================================================================
    7784             :              */
    7785             : 
    7786     4729250 :             for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
    7787             :             {
    7788     2822920 :                 double dfBandDensity = 0.0;
    7789     2822920 :                 double dfValueReal = 0.0;
    7790     2822920 :                 double dfValueImag = 0.0;
    7791     2822920 :                 double dfValueRealTmp = 0.0;
    7792     2822920 :                 double dfValueImagTmp = 0.0;
    7793             : 
    7794             :                 /* --------------------------------------------------------------------
    7795             :                  */
    7796             :                 /*      Collect the source value. */
    7797             :                 /* --------------------------------------------------------------------
    7798             :                  */
    7799             : 
    7800             :                 // Loop over source lines and pixels - 3 possible algorithms.
    7801             : 
    7802     2822920 :                 if (poWK->eResample == GRA_Average)
    7803             :                 {
    7804      300849 :                     double dfTotalWeight = 0.0;
    7805             : 
    7806             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7807             :                     // in gcore/overview.cpp.
    7808      631308 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7809             :                     {
    7810      330459 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7811      330459 :                         iSrcOffset = iSrcXMin +
    7812      330459 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7813      803200 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7814             :                              iSrcX++, iSrcOffset++)
    7815             :                         {
    7816      472741 :                             if (bWrapOverX)
    7817         630 :                                 iSrcOffset =
    7818         630 :                                     (iSrcX % nSrcXSize) +
    7819         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7820             : 
    7821      472745 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7822           4 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7823             :                                             iSrcOffset))
    7824             :                             {
    7825           1 :                                 continue;
    7826             :                             }
    7827             : 
    7828      472740 :                             if (GWKGetPixelValue(
    7829             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7830      945480 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7831      472740 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7832             :                             {
    7833      472740 :                                 const double dfWeight =
    7834      472740 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7835      472740 :                                 if (dfWeight > 0)
    7836             :                                 {
    7837             :                                     // Weighted incremental algorithm mean
    7838             :                                     // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7839      472740 :                                     dfTotalWeight += dfWeight;
    7840      472740 :                                     dfValueReal +=
    7841      472740 :                                         (dfWeight / dfTotalWeight) *
    7842      472740 :                                         (dfValueRealTmp - dfValueReal);
    7843      472740 :                                     if (bIsComplex)
    7844             :                                     {
    7845         252 :                                         dfValueImag +=
    7846         252 :                                             (dfWeight / dfTotalWeight) *
    7847         252 :                                             (dfValueImagTmp - dfValueImag);
    7848             :                                     }
    7849             :                                 }
    7850             :                             }
    7851             :                         }
    7852             :                     }
    7853             : 
    7854      300849 :                     if (dfTotalWeight > 0)
    7855             :                     {
    7856      300849 :                         if (poWK->bApplyVerticalShift)
    7857             :                         {
    7858           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7859           0 :                                 continue;
    7860             :                             // Subtract padfZ[] since the coordinate
    7861             :                             // transformation is from target to source
    7862           0 :                             dfValueReal =
    7863           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7864           0 :                                 padfZ[iDstX] *
    7865             :                                     dfMultFactorVerticalShiftPipeline;
    7866             :                         }
    7867             : 
    7868      300849 :                         dfBandDensity = 1;
    7869      300849 :                         bHasFoundDensity = true;
    7870             :                     }
    7871             :                 }  // GRA_Average.
    7872             : 
    7873     2522070 :                 else if (poWK->eResample == GRA_RMS)
    7874             :                 {
    7875      300416 :                     double dfTotalReal = 0.0;
    7876      300416 :                     double dfTotalImag = 0.0;
    7877      300416 :                     double dfTotalWeight = 0.0;
    7878             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7879             :                     // in gcore/overview.cpp.
    7880      630578 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7881             :                     {
    7882      330162 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7883      330162 :                         iSrcOffset = iSrcXMin +
    7884      330162 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7885      802723 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7886             :                              iSrcX++, iSrcOffset++)
    7887             :                         {
    7888      472561 :                             if (bWrapOverX)
    7889         630 :                                 iSrcOffset =
    7890         630 :                                     (iSrcX % nSrcXSize) +
    7891         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7892             : 
    7893      472561 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7894           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7895             :                                             iSrcOffset))
    7896             :                             {
    7897           0 :                                 continue;
    7898             :                             }
    7899             : 
    7900      472561 :                             if (GWKGetPixelValue(
    7901             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7902      945122 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7903      472561 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7904             :                             {
    7905      472561 :                                 const double dfWeight =
    7906      472561 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7907      472561 :                                 dfTotalWeight += dfWeight;
    7908      472561 :                                 dfTotalReal +=
    7909      472561 :                                     dfValueRealTmp * dfValueRealTmp * dfWeight;
    7910      472561 :                                 if (bIsComplex)
    7911          48 :                                     dfTotalImag += dfValueImagTmp *
    7912          48 :                                                    dfValueImagTmp * dfWeight;
    7913             :                             }
    7914             :                         }
    7915             :                     }
    7916             : 
    7917      300416 :                     if (dfTotalWeight > 0)
    7918             :                     {
    7919      300416 :                         dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
    7920             : 
    7921      300416 :                         if (poWK->bApplyVerticalShift)
    7922             :                         {
    7923           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7924           0 :                                 continue;
    7925             :                             // Subtract padfZ[] since the coordinate
    7926             :                             // transformation is from target to source
    7927           0 :                             dfValueReal =
    7928           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7929           0 :                                 padfZ[iDstX] *
    7930             :                                     dfMultFactorVerticalShiftPipeline;
    7931             :                         }
    7932             : 
    7933      300416 :                         if (bIsComplex)
    7934          12 :                             dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
    7935             : 
    7936      300416 :                         dfBandDensity = 1;
    7937      300416 :                         bHasFoundDensity = true;
    7938             :                     }
    7939             :                 }  // GRA_RMS.
    7940             : 
    7941     2221660 :                 else if (poWK->eResample == GRA_Mode)
    7942             :                 {
    7943      496623 :                     float fMaxCount = 0.0f;
    7944      496623 :                     int nMode = -1;
    7945      496623 :                     bool bHasSourceValues = false;
    7946             : 
    7947      496623 :                     memset(pafCounts, 0, nBins * sizeof(float));
    7948             : 
    7949     1612560 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7950             :                     {
    7951     1115940 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7952     1115940 :                         iSrcOffset = iSrcXMin +
    7953     1115940 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7954     4733160 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7955             :                              iSrcX++, iSrcOffset++)
    7956             :                         {
    7957     3617230 :                             if (bWrapOverX)
    7958         630 :                                 iSrcOffset =
    7959         630 :                                     (iSrcX % nSrcXSize) +
    7960         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7961             : 
    7962     3617230 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7963           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7964             :                                             iSrcOffset))
    7965           0 :                                 continue;
    7966             : 
    7967     3617230 :                             if (GWKGetPixelValue(
    7968             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7969     7234450 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7970     3617230 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7971             :                             {
    7972     3617230 :                                 bHasSourceValues = true;
    7973     3617230 :                                 const int nVal =
    7974     3617230 :                                     static_cast<int>(dfValueRealTmp);
    7975     3617230 :                                 const int iBin = nVal + nBinsOffset;
    7976     3617230 :                                 const double dfWeight =
    7977     3617230 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7978             : 
    7979             :                                 // Sum the density.
    7980     3617230 :                                 pafCounts[iBin] += static_cast<float>(dfWeight);
    7981             :                                 // Is it the most common value so far?
    7982     3617230 :                                 bool bUpdateMode = pafCounts[iBin] > fMaxCount;
    7983     3617230 :                                 if (!bUpdateMode &&
    7984      778316 :                                     pafCounts[iBin] == fMaxCount)
    7985             :                                 {
    7986      218628 :                                     switch (eTieStrategy)
    7987             :                                     {
    7988      218620 :                                         case GWKTS_First:
    7989      218620 :                                             break;
    7990           4 :                                         case GWKTS_Min:
    7991           4 :                                             bUpdateMode = nVal < nMode;
    7992           4 :                                             break;
    7993           4 :                                         case GWKTS_Max:
    7994           4 :                                             bUpdateMode = nVal > nMode;
    7995           4 :                                             break;
    7996             :                                     }
    7997             :                                 }
    7998     3617230 :                                 if (bUpdateMode)
    7999             :                                 {
    8000     2838920 :                                     nMode = nVal;
    8001     2838920 :                                     fMaxCount = pafCounts[iBin];
    8002             :                                 }
    8003             :                             }
    8004             :                         }
    8005             :                     }
    8006             : 
    8007      496623 :                     if (bHasSourceValues)
    8008             :                     {
    8009      496623 :                         dfValueReal = nMode;
    8010      496623 :                         dfBandDensity = 1;
    8011      496623 :                         bHasFoundDensity = true;
    8012             :                     }
    8013             :                 }  // GRA_Mode.
    8014             : 
    8015     1725040 :                 else if (poWK->eResample == GRA_Max)
    8016             :                 {
    8017      335037 :                     bool bFoundValid = false;
    8018      335037 :                     double dfTotalReal = cpl::NumericLimits<double>::lowest();
    8019             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8020     1288010 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8021             :                     {
    8022      952975 :                         iSrcOffset = iSrcXMin +
    8023      952975 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8024     4406540 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8025             :                              iSrcX++, iSrcOffset++)
    8026             :                         {
    8027     3453560 :                             if (bWrapOverX)
    8028         630 :                                 iSrcOffset =
    8029         630 :                                     (iSrcX % nSrcXSize) +
    8030         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8031             : 
    8032     3456370 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8033        2809 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8034             :                                             iSrcOffset))
    8035             :                             {
    8036        2446 :                                 continue;
    8037             :                             }
    8038             : 
    8039             :                             // Returns pixel value if it is not no data.
    8040     3451120 :                             if (GWKGetPixelValue(
    8041             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8042     6902230 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8043     3451120 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8044             :                             {
    8045     3451120 :                                 bFoundValid = true;
    8046     3451120 :                                 if (dfTotalReal < dfValueRealTmp)
    8047             :                                 {
    8048      442642 :                                     dfTotalReal = dfValueRealTmp;
    8049             :                                 }
    8050             :                             }
    8051             :                         }
    8052             :                     }
    8053             : 
    8054      335037 :                     if (bFoundValid)
    8055             :                     {
    8056      335037 :                         dfValueReal = dfTotalReal;
    8057             : 
    8058      335037 :                         if (poWK->bApplyVerticalShift)
    8059             :                         {
    8060           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8061           0 :                                 continue;
    8062             :                             // Subtract padfZ[] since the coordinate
    8063             :                             // transformation is from target to source
    8064           0 :                             dfValueReal =
    8065           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8066           0 :                                 padfZ[iDstX] *
    8067             :                                     dfMultFactorVerticalShiftPipeline;
    8068             :                         }
    8069             : 
    8070      335037 :                         dfBandDensity = 1;
    8071      335037 :                         bHasFoundDensity = true;
    8072             :                     }
    8073             :                 }
    8074             : 
    8075     1390000 :                 else if (poWK->eResample == GRA_Min)
    8076             :                 {
    8077      335012 :                     bool bFoundValid = false;
    8078      335012 :                     double dfTotalReal = cpl::NumericLimits<double>::max();
    8079             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8080     1287720 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8081             :                     {
    8082      952710 :                         iSrcOffset = iSrcXMin +
    8083      952710 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8084     4403460 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8085             :                              iSrcX++, iSrcOffset++)
    8086             :                         {
    8087     3450750 :                             if (bWrapOverX)
    8088         630 :                                 iSrcOffset =
    8089         630 :                                     (iSrcX % nSrcXSize) +
    8090         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8091             : 
    8092     3450750 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8093           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8094             :                                             iSrcOffset))
    8095             :                             {
    8096           0 :                                 continue;
    8097             :                             }
    8098             : 
    8099             :                             // Returns pixel value if it is not no data.
    8100     3450750 :                             if (GWKGetPixelValue(
    8101             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8102     6901500 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8103     3450750 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8104             :                             {
    8105     3450750 :                                 bFoundValid = true;
    8106     3450750 :                                 if (dfTotalReal > dfValueRealTmp)
    8107             :                                 {
    8108      443069 :                                     dfTotalReal = dfValueRealTmp;
    8109             :                                 }
    8110             :                             }
    8111             :                         }
    8112             :                     }
    8113             : 
    8114      335012 :                     if (bFoundValid)
    8115             :                     {
    8116      335012 :                         dfValueReal = dfTotalReal;
    8117             : 
    8118      335012 :                         if (poWK->bApplyVerticalShift)
    8119             :                         {
    8120           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8121           0 :                                 continue;
    8122             :                             // Subtract padfZ[] since the coordinate
    8123             :                             // transformation is from target to source
    8124           0 :                             dfValueReal =
    8125           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8126           0 :                                 padfZ[iDstX] *
    8127             :                                     dfMultFactorVerticalShiftPipeline;
    8128             :                         }
    8129             : 
    8130      335012 :                         dfBandDensity = 1;
    8131      335012 :                         bHasFoundDensity = true;
    8132             :                     }
    8133             :                 }  // GRA_Min.
    8134             : 
    8135             :                 else
    8136             :                 // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
    8137             :                 {
    8138     1054990 :                     CPLAssert(quant > 0.0f);
    8139             : 
    8140     1054990 :                     bool bFoundValid = false;
    8141     1054990 :                     std::vector<double> dfRealValuesTmp;
    8142             : 
    8143             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8144     4012980 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8145             :                     {
    8146     2957990 :                         iSrcOffset = iSrcXMin +
    8147     2957990 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8148    13509900 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8149             :                              iSrcX++, iSrcOffset++)
    8150             :                         {
    8151    10551900 :                             if (bWrapOverX)
    8152        1890 :                                 iSrcOffset =
    8153        1890 :                                     (iSrcX % nSrcXSize) +
    8154        1890 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8155             : 
    8156    10748500 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8157      196608 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8158             :                                             iSrcOffset))
    8159             :                             {
    8160      195449 :                                 continue;
    8161             :                             }
    8162             : 
    8163             :                             // Returns pixel value if it is not no data.
    8164    10356400 :                             if (GWKGetPixelValue(
    8165             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8166    20712900 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8167    10356400 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8168             :                             {
    8169    10356400 :                                 bFoundValid = true;
    8170    10356400 :                                 dfRealValuesTmp.push_back(dfValueRealTmp);
    8171             :                             }
    8172             :                         }
    8173             :                     }
    8174             : 
    8175     1054990 :                     if (bFoundValid)
    8176             :                     {
    8177     1006150 :                         std::sort(dfRealValuesTmp.begin(),
    8178             :                                   dfRealValuesTmp.end());
    8179             :                         int quantIdx = static_cast<int>(
    8180     1006150 :                             std::ceil(quant * dfRealValuesTmp.size() - 1));
    8181     1006150 :                         dfValueReal = dfRealValuesTmp[quantIdx];
    8182             : 
    8183     1006150 :                         if (poWK->bApplyVerticalShift)
    8184             :                         {
    8185           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8186           0 :                                 continue;
    8187             :                             // Subtract padfZ[] since the coordinate
    8188             :                             // transformation is from target to source
    8189           0 :                             dfValueReal =
    8190           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8191           0 :                                 padfZ[iDstX] *
    8192             :                                     dfMultFactorVerticalShiftPipeline;
    8193             :                         }
    8194             : 
    8195     1006150 :                         dfBandDensity = 1;
    8196     1006150 :                         bHasFoundDensity = true;
    8197     1006150 :                         dfRealValuesTmp.clear();
    8198             :                     }
    8199             :                 }  // Quantile.
    8200             : 
    8201             :                 /* --------------------------------------------------------------------
    8202             :                  */
    8203             :                 /*      We have a computed value from the source.  Now apply it
    8204             :                  * to      */
    8205             :                 /*      the destination pixel. */
    8206             :                 /* --------------------------------------------------------------------
    8207             :                  */
    8208     2822920 :                 if (bHasFoundDensity)
    8209             :                 {
    8210             :                     // TODO: Should we compute dfBandDensity in fct of
    8211             :                     // nCount/nCount2, or use as a threshold to set the dest
    8212             :                     // value?
    8213             :                     // dfBandDensity = (float) nCount / nCount2;
    8214             :                     // if( (float) nCount / nCount2 > 0.1 )
    8215             :                     // or fix gdalwarp crop_to_cutline to crop partially
    8216             :                     // overlapping pixels.
    8217     2774080 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    8218             :                                      dfValueReal, dfValueImag,
    8219             :                                      bAvoidNoDataSingleBand);
    8220             :                 }
    8221             :             }
    8222             : 
    8223     1906320 :             if (!bHasFoundDensity)
    8224      507587 :                 continue;
    8225             : 
    8226     1398740 :             if (!bAvoidNoDataSingleBand)
    8227             :             {
    8228           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    8229             :             }
    8230             : 
    8231             :             /* --------------------------------------------------------------------
    8232             :              */
    8233             :             /*      Update destination density/validity masks. */
    8234             :             /* --------------------------------------------------------------------
    8235             :              */
    8236     1398740 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    8237             : 
    8238     1398740 :             if (poWK->panDstValid != nullptr)
    8239             :             {
    8240        1184 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    8241             :             }
    8242             :         } /* Next iDstX */
    8243             : 
    8244             :         /* --------------------------------------------------------------------
    8245             :          */
    8246             :         /*      Report progress to the user, and optionally cancel out. */
    8247             :         /* --------------------------------------------------------------------
    8248             :          */
    8249        8078 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    8250           0 :             break;
    8251             :     }
    8252             : 
    8253             :     /* -------------------------------------------------------------------- */
    8254             :     /*      Cleanup and return.                                             */
    8255             :     /* -------------------------------------------------------------------- */
    8256         136 :     CPLFree(padfX);
    8257         136 :     CPLFree(padfY);
    8258         136 :     CPLFree(padfZ);
    8259         136 :     CPLFree(padfX2);
    8260         136 :     CPLFree(padfY2);
    8261         136 :     CPLFree(padfZ2);
    8262         136 :     CPLFree(pabSuccess);
    8263         136 :     CPLFree(pabSuccess2);
    8264         136 :     VSIFree(pafCounts);
    8265             : }
    8266             : 
    8267             : /************************************************************************/
    8268             : /*                         getOrientation()                             */
    8269             : /************************************************************************/
    8270             : 
    8271             : typedef std::pair<double, double> XYPair;
    8272             : 
    8273             : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
    8274             : // -1 if it is counter-clockwise oriented,
    8275             : // or 0 if it is colinear.
    8276     2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
    8277             : {
    8278     2355910 :     const double p1x = p1.first;
    8279     2355910 :     const double p1y = p1.second;
    8280     2355910 :     const double p2x = p2.first;
    8281     2355910 :     const double p2y = p2.second;
    8282     2355910 :     const double p3x = p3.first;
    8283     2355910 :     const double p3y = p3.second;
    8284     2355910 :     const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
    8285     2355910 :     if (std::abs(val) < 1e-20)
    8286        2690 :         return 0;
    8287     2353220 :     else if (val > 0)
    8288           0 :         return 1;
    8289             :     else
    8290     2353220 :         return -1;
    8291             : }
    8292             : 
    8293             : /************************************************************************/
    8294             : /*                          isConvex()                                  */
    8295             : /************************************************************************/
    8296             : 
    8297             : typedef std::vector<XYPair> XYPoly;
    8298             : 
    8299             : // poly must be closed
    8300      785302 : static bool isConvex(const XYPoly &poly)
    8301             : {
    8302      785302 :     const size_t n = poly.size();
    8303      785302 :     size_t i = 0;
    8304      785302 :     int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8305      785302 :     ++i;
    8306     2355910 :     for (; i < n - 2; ++i)
    8307             :     {
    8308             :         const int orientation =
    8309     1570600 :             getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8310     1570600 :         if (orientation != 0)
    8311             :         {
    8312     1567910 :             if (last_orientation == 0)
    8313           0 :                 last_orientation = orientation;
    8314     1567910 :             else if (orientation != last_orientation)
    8315           0 :                 return false;
    8316             :         }
    8317             :     }
    8318      785302 :     return true;
    8319             : }
    8320             : 
    8321             : /************************************************************************/
    8322             : /*                     pointIntersectsConvexPoly()                      */
    8323             : /************************************************************************/
    8324             : 
    8325             : // Returns whether xy intersects poly, that must be closed and convex.
    8326     6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
    8327             : {
    8328     6049100 :     const size_t n = poly.size();
    8329     6049100 :     double dx1 = xy.first - poly[0].first;
    8330     6049100 :     double dy1 = xy.second - poly[0].second;
    8331     6049100 :     double dx2 = poly[1].first - poly[0].first;
    8332     6049100 :     double dy2 = poly[1].second - poly[0].second;
    8333     6049100 :     double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
    8334             : 
    8335             :     // Check if the point remains on the same side (left/right) of all edges
    8336    14556400 :     for (size_t i = 2; i < n; i++)
    8337             :     {
    8338    12793100 :         dx1 = xy.first - poly[i - 1].first;
    8339    12793100 :         dy1 = xy.second - poly[i - 1].second;
    8340             : 
    8341    12793100 :         dx2 = poly[i].first - poly[i - 1].first;
    8342    12793100 :         dy2 = poly[i].second - poly[i - 1].second;
    8343             : 
    8344    12793100 :         double crossProduct = dx1 * dy2 - dx2 * dy1;
    8345    12793100 :         if (std::abs(prevCrossProduct) < 1e-20)
    8346      725558 :             prevCrossProduct = crossProduct;
    8347    12067500 :         else if (prevCrossProduct * crossProduct < 0)
    8348     4285760 :             return false;
    8349             :     }
    8350             : 
    8351     1763340 :     return true;
    8352             : }
    8353             : 
    8354             : /************************************************************************/
    8355             : /*                     getIntersection()                                */
    8356             : /************************************************************************/
    8357             : 
    8358             : /* Returns intersection of [p1,p2] with [p3,p4], if
    8359             :  * it is a single point, and the 2 segments are not colinear.
    8360             :  */
    8361    11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
    8362             :                             const XYPair &p3, const XYPair &p4, XYPair &xy)
    8363             : {
    8364    11811000 :     const double x1 = p1.first;
    8365    11811000 :     const double y1 = p1.second;
    8366    11811000 :     const double x2 = p2.first;
    8367    11811000 :     const double y2 = p2.second;
    8368    11811000 :     const double x3 = p3.first;
    8369    11811000 :     const double y3 = p3.second;
    8370    11811000 :     const double x4 = p4.first;
    8371    11811000 :     const double y4 = p4.second;
    8372    11811000 :     const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
    8373    11811000 :     const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
    8374    11811000 :     if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
    8375     9260780 :         return false;
    8376             : 
    8377     2550260 :     const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
    8378     2550260 :     if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
    8379      973924 :         return false;
    8380             : 
    8381     1576340 :     const double t = t_num / denom;
    8382     1576340 :     xy.first = x1 + t * (x2 - x1);
    8383     1576340 :     xy.second = y1 + t * (y2 - y1);
    8384     1576340 :     return true;
    8385             : }
    8386             : 
    8387             : /************************************************************************/
    8388             : /*                     getConvexPolyIntersection()                      */
    8389             : /************************************************************************/
    8390             : 
    8391             : // poly1 and poly2 must be closed and convex.
    8392             : // The returned intersection will not necessary be closed.
    8393      785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
    8394             :                                       XYPoly &intersection)
    8395             : {
    8396      785302 :     intersection.clear();
    8397             : 
    8398             :     // Add all points of poly1 inside poly2
    8399     3926510 :     for (size_t i = 0; i < poly1.size() - 1; ++i)
    8400             :     {
    8401     3141210 :         if (pointIntersectsConvexPoly(poly1[i], poly2))
    8402     1187430 :             intersection.push_back(poly1[i]);
    8403             :     }
    8404      785302 :     if (intersection.size() == poly1.size() - 1)
    8405             :     {
    8406             :         // poly1 is inside poly2
    8407      119100 :         return;
    8408             :     }
    8409             : 
    8410             :     // Add all points of poly2 inside poly1
    8411     3634860 :     for (size_t i = 0; i < poly2.size() - 1; ++i)
    8412             :     {
    8413     2907890 :         if (pointIntersectsConvexPoly(poly2[i], poly1))
    8414      575904 :             intersection.push_back(poly2[i]);
    8415             :     }
    8416             : 
    8417             :     // Compute the intersection of all edges of both polygons
    8418      726972 :     XYPair xy;
    8419     3634860 :     for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
    8420             :     {
    8421    14539400 :         for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
    8422             :         {
    8423    11631600 :             if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
    8424    11631600 :                                 poly2[i2 + 1], xy))
    8425             :             {
    8426     1576230 :                 intersection.push_back(xy);
    8427             :             }
    8428             :         }
    8429             :     }
    8430             : 
    8431      726972 :     if (intersection.empty())
    8432       60770 :         return;
    8433             : 
    8434             :     // Find lowest-left point in intersection set
    8435      666202 :     double lowest_x = cpl::NumericLimits<double>::max();
    8436      666202 :     double lowest_y = cpl::NumericLimits<double>::max();
    8437     3772450 :     for (const auto &pair : intersection)
    8438             :     {
    8439     3106240 :         const double x = pair.first;
    8440     3106240 :         const double y = pair.second;
    8441     3106240 :         if (y < lowest_y || (y == lowest_y && x < lowest_x))
    8442             :         {
    8443     1096040 :             lowest_x = x;
    8444     1096040 :             lowest_y = y;
    8445             :         }
    8446             :     }
    8447             : 
    8448     5737980 :     const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
    8449             :     {
    8450     5737980 :         const double p1x_diff = p1.first - lowest_x;
    8451     5737980 :         const double p1y_diff = p1.second - lowest_y;
    8452     5737980 :         const double p2x_diff = p2.first - lowest_x;
    8453     5737980 :         const double p2y_diff = p2.second - lowest_y;
    8454     5737980 :         if (p2y_diff == 0.0 && p1y_diff == 0.0)
    8455             :         {
    8456     2655420 :             if (p1x_diff >= 0)
    8457             :             {
    8458     2655420 :                 if (p2x_diff >= 0)
    8459     2655420 :                     return p1.first < p2.first;
    8460           0 :                 return true;
    8461             :             }
    8462             :             else
    8463             :             {
    8464           0 :                 if (p2x_diff >= 0)
    8465           0 :                     return false;
    8466           0 :                 return p1.first < p2.first;
    8467             :             }
    8468             :         }
    8469             : 
    8470     3082560 :         if (p2x_diff == 0.0 && p1x_diff == 0.0)
    8471     1046960 :             return p1.second < p2.second;
    8472             : 
    8473             :         double tan_p1;
    8474     2035600 :         if (p1x_diff == 0.0)
    8475      464622 :             tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    8476             :         else
    8477     1570980 :             tan_p1 = p1y_diff / p1x_diff;
    8478             : 
    8479             :         double tan_p2;
    8480     2035600 :         if (p2x_diff == 0.0)
    8481      839515 :             tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    8482             :         else
    8483     1196080 :             tan_p2 = p2y_diff / p2x_diff;
    8484             : 
    8485     2035600 :         if (tan_p1 >= 0)
    8486             :         {
    8487     1904790 :             if (tan_p2 >= 0)
    8488     1881590 :                 return tan_p1 < tan_p2;
    8489             :             else
    8490       23199 :                 return true;
    8491             :         }
    8492             :         else
    8493             :         {
    8494      130806 :             if (tan_p2 >= 0)
    8495      103900 :                 return false;
    8496             :             else
    8497       26906 :                 return tan_p1 < tan_p2;
    8498             :         }
    8499      666202 :     };
    8500             : 
    8501             :     // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
    8502             :     // hull
    8503      666202 :     std::sort(intersection.begin(), intersection.end(), sortFunc);
    8504             : 
    8505             :     // Remove duplicated points
    8506      666202 :     size_t j = 1;
    8507     3106240 :     for (size_t i = 1; i < intersection.size(); ++i)
    8508             :     {
    8509     2440040 :         if (intersection[i] != intersection[i - 1])
    8510             :         {
    8511     1452560 :             if (j < i)
    8512      545275 :                 intersection[j] = intersection[i];
    8513     1452560 :             ++j;
    8514             :         }
    8515             :     }
    8516      666202 :     intersection.resize(j);
    8517             : }
    8518             : 
    8519             : /************************************************************************/
    8520             : /*                            getArea()                                 */
    8521             : /************************************************************************/
    8522             : 
    8523             : // poly may or may not be closed.
    8524      558521 : static double getArea(const XYPoly &poly)
    8525             : {
    8526             :     // CPLAssert(poly.size() >= 2);
    8527      558521 :     const size_t nPointCount = poly.size();
    8528             :     double dfAreaSum =
    8529      558521 :         poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
    8530             : 
    8531     1765140 :     for (size_t i = 1; i < nPointCount - 1; i++)
    8532             :     {
    8533     1206610 :         dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
    8534             :     }
    8535             : 
    8536      558521 :     dfAreaSum += poly[nPointCount - 1].first *
    8537      558521 :                  (poly[0].second - poly[nPointCount - 2].second);
    8538             : 
    8539      558521 :     return 0.5 * std::fabs(dfAreaSum);
    8540             : }
    8541             : 
    8542             : /************************************************************************/
    8543             : /*                           GWKSumPreserving()                         */
    8544             : /************************************************************************/
    8545             : 
    8546             : static void GWKSumPreservingThread(void *pData);
    8547             : 
    8548          19 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
    8549             : {
    8550          19 :     return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
    8551             : }
    8552             : 
    8553          19 : static void GWKSumPreservingThread(void *pData)
    8554             : {
    8555          19 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    8556          19 :     GDALWarpKernel *poWK = psJob->poWK;
    8557          19 :     const int iYMin = psJob->iYMin;
    8558          19 :     const int iYMax = psJob->iYMax;
    8559             :     const bool bIsAffineNoRotation =
    8560          19 :         GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
    8561          28 :                                         poWK->pTransformerArg) &&
    8562             :         // for debug/testing purposes
    8563           9 :         CPLTestBool(
    8564          19 :             CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
    8565             :     const bool bAvoidNoDataSingleBand =
    8566          21 :         poWK->nBands == 1 ||
    8567           2 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    8568          19 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    8569             : 
    8570          19 :     const int nDstXSize = poWK->nDstXSize;
    8571          19 :     const int nSrcXSize = poWK->nSrcXSize;
    8572          19 :     const int nSrcYSize = poWK->nSrcYSize;
    8573             : 
    8574          38 :     std::vector<double> adfX0(nSrcXSize + 1);
    8575          38 :     std::vector<double> adfY0(nSrcXSize + 1);
    8576          38 :     std::vector<double> adfZ0(nSrcXSize + 1);
    8577          38 :     std::vector<double> adfX1(nSrcXSize + 1);
    8578          38 :     std::vector<double> adfY1(nSrcXSize + 1);
    8579          38 :     std::vector<double> adfZ1(nSrcXSize + 1);
    8580          38 :     std::vector<int> abSuccess0(nSrcXSize + 1);
    8581          38 :     std::vector<int> abSuccess1(nSrcXSize + 1);
    8582             : 
    8583             :     CPLRectObj sGlobalBounds;
    8584          19 :     sGlobalBounds.minx = -2 * poWK->dfXScale;
    8585          19 :     sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
    8586          19 :     sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
    8587          19 :     sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
    8588          19 :     CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
    8589             : 
    8590             :     struct SourcePixel
    8591             :     {
    8592             :         int iSrcX;
    8593             :         int iSrcY;
    8594             : 
    8595             :         // Coordinates of source pixel in target pixel coordinates
    8596             :         double dfDstX0;
    8597             :         double dfDstY0;
    8598             :         double dfDstX1;
    8599             :         double dfDstY1;
    8600             :         double dfDstX2;
    8601             :         double dfDstY2;
    8602             :         double dfDstX3;
    8603             :         double dfDstY3;
    8604             : 
    8605             :         // Source pixel total area (might be larger than the one described
    8606             :         // by above coordinates, if the pixel was crossing the antimeridian
    8607             :         // and split)
    8608             :         double dfArea;
    8609             :     };
    8610             : 
    8611          38 :     std::vector<SourcePixel> sourcePixels;
    8612             : 
    8613          38 :     XYPoly discontinuityLeft(5);
    8614          38 :     XYPoly discontinuityRight(5);
    8615             : 
    8616             :     /* ==================================================================== */
    8617             :     /*      First pass: transform the 4 corners of each potential           */
    8618             :     /*      contributing source pixel to target pixel coordinates.          */
    8619             :     /* ==================================================================== */
    8620             : 
    8621             :     // Special case for top line
    8622             :     {
    8623          19 :         int iY = 0;
    8624        3364 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8625             :         {
    8626        3345 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8627        3345 :             adfY1[iX] = iY + poWK->nSrcYOff;
    8628        3345 :             adfZ1[iX] = 0;
    8629             :         }
    8630             : 
    8631          19 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8632             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8633             :                              abSuccess1.data());
    8634             : 
    8635        3364 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8636             :         {
    8637        3345 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8638           0 :                 abSuccess1[iX] = FALSE;
    8639             :             else
    8640             :             {
    8641        3345 :                 adfX1[iX] -= poWK->nDstXOff;
    8642        3345 :                 adfY1[iX] -= poWK->nDstYOff;
    8643             :             }
    8644             :         }
    8645             :     }
    8646             : 
    8647       22624 :     const auto getInsideXSign = [poWK, nDstXSize](double dfX)
    8648             :     {
    8649       22624 :         return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
    8650       10966 :                        dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
    8651       22624 :                    ? 1
    8652       11658 :                    : -1;
    8653          19 :     };
    8654             : 
    8655             :     const auto FindDiscontinuity =
    8656          80 :         [poWK, psJob, getInsideXSign](
    8657             :             double dfXLeft, double dfXRight, double dfY,
    8658             :             int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
    8659         800 :             double &dfXMidReprojectedRight, double &dfYMidReprojected)
    8660             :     {
    8661         880 :         for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
    8662             :         {
    8663         800 :             double dfXMid = (dfXLeft + dfXRight) / 2;
    8664         800 :             double dfXMidReprojected = dfXMid;
    8665         800 :             dfYMidReprojected = dfY;
    8666         800 :             double dfZ = 0;
    8667         800 :             int nSuccess = 0;
    8668         800 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
    8669             :                                  &dfXMidReprojected, &dfYMidReprojected, &dfZ,
    8670             :                                  &nSuccess);
    8671         800 :             if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
    8672             :             {
    8673         456 :                 dfXRight = dfXMid;
    8674         456 :                 dfXMidReprojectedRight = dfXMidReprojected;
    8675             :             }
    8676             :             else
    8677             :             {
    8678         344 :                 dfXLeft = dfXMid;
    8679         344 :                 dfXMidReprojectedLeft = dfXMidReprojected;
    8680             :             }
    8681             :         }
    8682          80 :     };
    8683             : 
    8684        2685 :     for (int iY = 0; iY < nSrcYSize; ++iY)
    8685             :     {
    8686        2666 :         std::swap(adfX0, adfX1);
    8687        2666 :         std::swap(adfY0, adfY1);
    8688        2666 :         std::swap(adfZ0, adfZ1);
    8689        2666 :         std::swap(abSuccess0, abSuccess1);
    8690             : 
    8691     4836120 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8692             :         {
    8693     4833460 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8694     4833460 :             adfY1[iX] = iY + 1 + poWK->nSrcYOff;
    8695     4833460 :             adfZ1[iX] = 0;
    8696             :         }
    8697             : 
    8698        2666 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8699             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8700             :                              abSuccess1.data());
    8701             : 
    8702     4836120 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8703             :         {
    8704     4833460 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8705           0 :                 abSuccess1[iX] = FALSE;
    8706             :             else
    8707             :             {
    8708     4833460 :                 adfX1[iX] -= poWK->nDstXOff;
    8709     4833460 :                 adfY1[iX] -= poWK->nDstYOff;
    8710             :             }
    8711             :         }
    8712             : 
    8713     4833460 :         for (int iX = 0; iX < nSrcXSize; ++iX)
    8714             :         {
    8715     9661580 :             if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
    8716     4830790 :                 abSuccess1[iX + 1])
    8717             :             {
    8718             :                 /* --------------------------------------------------------------------
    8719             :                  */
    8720             :                 /*      Do not try to apply transparent source pixels to the
    8721             :                  * destination.*/
    8722             :                 /* --------------------------------------------------------------------
    8723             :                  */
    8724     4830790 :                 const auto iSrcOffset =
    8725     4830790 :                     iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
    8726     9560570 :                 if (poWK->panUnifiedSrcValid != nullptr &&
    8727     4729780 :                     !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    8728             :                 {
    8729     4738340 :                     continue;
    8730             :                 }
    8731             : 
    8732      103415 :                 if (poWK->pafUnifiedSrcDensity != nullptr)
    8733             :                 {
    8734           0 :                     if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
    8735             :                         SRC_DENSITY_THRESHOLD_FLOAT)
    8736           0 :                         continue;
    8737             :                 }
    8738             : 
    8739             :                 SourcePixel sp;
    8740      103415 :                 sp.dfArea = 0;
    8741      103415 :                 sp.dfDstX0 = adfX0[iX];
    8742      103415 :                 sp.dfDstY0 = adfY0[iX];
    8743      103415 :                 sp.dfDstX1 = adfX0[iX + 1];
    8744      103415 :                 sp.dfDstY1 = adfY0[iX + 1];
    8745      103415 :                 sp.dfDstX2 = adfX1[iX + 1];
    8746      103415 :                 sp.dfDstY2 = adfY1[iX + 1];
    8747      103415 :                 sp.dfDstX3 = adfX1[iX];
    8748      103415 :                 sp.dfDstY3 = adfY1[iX];
    8749             : 
    8750             :                 // Detect pixel that likely cross the anti-meridian and
    8751             :                 // introduce a discontinuity when reprojected.
    8752             : 
    8753      103415 :                 if (std::fabs(adfX0[iX] - adfX0[iX + 1]) > 2 * poWK->dfXScale &&
    8754       10766 :                     std::fabs(adfX1[iX] - adfX1[iX + 1]) > 2 * poWK->dfXScale &&
    8755        5241 :                     getInsideXSign(adfX0[iX]) !=
    8756        5313 :                         getInsideXSign(adfX0[iX + 1]) &&
    8757         128 :                     getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
    8758          56 :                     getInsideXSign(adfX0[iX + 1]) ==
    8759      108996 :                         getInsideXSign(adfX1[iX + 1]) &&
    8760          40 :                     (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
    8761             :                         0)
    8762             :                 {
    8763             : #ifdef DEBUG_VERBOSE
    8764             :                     CPLDebug(
    8765             :                         "WARP",
    8766             :                         "Discontinuity for iSrcX=%d, iSrcY=%d, dest corners:"
    8767             :                         "X0[iX]=%f X0[iX+1]=%f X1[iX]=%f X1[iX+1]=%f,"
    8768             :                         "Y0[iX]=%f Y0[iX+1]=%f Y1[iX]=%f Y1[iX+1]=%f",
    8769             :                         iX + poWK->nSrcXOff, iY + poWK->nSrcYOff, adfX0[iX],
    8770             :                         adfX0[iX + 1], adfX1[iX], adfX1[iX + 1], adfY0[iX],
    8771             :                         adfY0[iX + 1], adfY1[iX], adfY1[iX + 1]);
    8772             : #endif
    8773          40 :                     double dfXMidReprojectedLeftTop = 0;
    8774          40 :                     double dfXMidReprojectedRightTop = 0;
    8775          40 :                     double dfYMidReprojectedTop = 0;
    8776          40 :                     FindDiscontinuity(
    8777          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8778          80 :                         iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
    8779             :                         dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
    8780             :                         dfYMidReprojectedTop);
    8781          40 :                     double dfXMidReprojectedLeftBottom = 0;
    8782          40 :                     double dfXMidReprojectedRightBottom = 0;
    8783          40 :                     double dfYMidReprojectedBottom = 0;
    8784          40 :                     FindDiscontinuity(
    8785          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8786          80 :                         iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
    8787             :                         dfXMidReprojectedLeftBottom,
    8788             :                         dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
    8789             : 
    8790          40 :                     discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
    8791          40 :                     discontinuityLeft[1] =
    8792          80 :                         XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
    8793          40 :                     discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
    8794          40 :                                                   dfYMidReprojectedBottom);
    8795          40 :                     discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
    8796          40 :                     discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
    8797             : 
    8798          40 :                     discontinuityRight[0] =
    8799          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8800          40 :                     discontinuityRight[1] =
    8801          80 :                         XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
    8802          40 :                     discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
    8803          40 :                                                    dfYMidReprojectedBottom);
    8804          40 :                     discontinuityRight[3] =
    8805          80 :                         XYPair(adfX1[iX + 1], adfY1[iX + 1]);
    8806          40 :                     discontinuityRight[4] =
    8807          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8808             : 
    8809          40 :                     sp.dfArea = getArea(discontinuityLeft) +
    8810          40 :                                 getArea(discontinuityRight);
    8811          40 :                     if (getInsideXSign(adfX0[iX]) >= 1)
    8812             :                     {
    8813          20 :                         sp.dfDstX1 = dfXMidReprojectedLeftTop;
    8814          20 :                         sp.dfDstY1 = dfYMidReprojectedTop;
    8815          20 :                         sp.dfDstX2 = dfXMidReprojectedLeftBottom;
    8816          20 :                         sp.dfDstY2 = dfYMidReprojectedBottom;
    8817             :                     }
    8818             :                     else
    8819             :                     {
    8820          20 :                         sp.dfDstX0 = dfXMidReprojectedRightTop;
    8821          20 :                         sp.dfDstY0 = dfYMidReprojectedTop;
    8822          20 :                         sp.dfDstX3 = dfXMidReprojectedRightBottom;
    8823          20 :                         sp.dfDstY3 = dfYMidReprojectedBottom;
    8824             :                     }
    8825             :                 }
    8826             : 
    8827             :                 // Bounding box of source pixel (expressed in target pixel
    8828             :                 // coordinates)
    8829             :                 CPLRectObj sRect;
    8830      103415 :                 sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
    8831      103415 :                                       std::min(sp.dfDstX2, sp.dfDstX3));
    8832      103415 :                 sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
    8833      103415 :                                       std::min(sp.dfDstY2, sp.dfDstY3));
    8834      103415 :                 sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
    8835      103415 :                                       std::max(sp.dfDstX2, sp.dfDstX3));
    8836      103415 :                 sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
    8837      103415 :                                       std::max(sp.dfDstY2, sp.dfDstY3));
    8838      103415 :                 if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
    8839      101355 :                       sRect.miny < iYMax && sRect.maxy > iYMin))
    8840             :                 {
    8841       10852 :                     continue;
    8842             :                 }
    8843             : 
    8844       92563 :                 sp.iSrcX = iX;
    8845       92563 :                 sp.iSrcY = iY;
    8846             : 
    8847       92563 :                 if (!bIsAffineNoRotation)
    8848             :                 {
    8849             :                     // Check polygon validity (no self-crossing)
    8850       89745 :                     XYPair xy;
    8851       89745 :                     if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
    8852       89745 :                                         XYPair(sp.dfDstX1, sp.dfDstY1),
    8853       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8854      269235 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
    8855       89745 :                         getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
    8856       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8857       89745 :                                         XYPair(sp.dfDstX0, sp.dfDstY0),
    8858      179490 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy))
    8859             :                     {
    8860         113 :                         continue;
    8861             :                     }
    8862             :                 }
    8863             : 
    8864       92450 :                 CPLQuadTreeInsertWithBounds(
    8865             :                     hQuadTree,
    8866             :                     reinterpret_cast<void *>(
    8867       92450 :                         static_cast<uintptr_t>(sourcePixels.size())),
    8868             :                     &sRect);
    8869             : 
    8870       92450 :                 sourcePixels.push_back(sp);
    8871             :             }
    8872             :         }
    8873             :     }
    8874             : 
    8875          38 :     std::vector<double> adfRealValue(poWK->nBands);
    8876          38 :     std::vector<double> adfImagValue(poWK->nBands);
    8877          38 :     std::vector<double> adfBandDensity(poWK->nBands);
    8878          38 :     std::vector<double> adfWeight(poWK->nBands);
    8879             : 
    8880             : #ifdef CHECK_SUM_WITH_GEOS
    8881             :     auto hGEOSContext = OGRGeometry::createGEOSContext();
    8882             :     auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8883             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
    8884             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
    8885             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
    8886             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
    8887             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
    8888             :     auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
    8889             :     auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
    8890             : 
    8891             :     auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8892             :     auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
    8893             :     auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
    8894             : #endif
    8895             : 
    8896             :     const XYPoly xy1{
    8897          38 :         {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
    8898          38 :     XYPoly xy2(5);
    8899          38 :     XYPoly xy2_triangle(4);
    8900          38 :     XYPoly intersection;
    8901             : 
    8902             :     /* ==================================================================== */
    8903             :     /*      Loop over output lines.                                         */
    8904             :     /* ==================================================================== */
    8905        1951 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    8906             :     {
    8907             :         CPLRectObj sRect;
    8908        1932 :         sRect.miny = iDstY;
    8909        1932 :         sRect.maxy = iDstY + 1;
    8910             : 
    8911             :         /* ====================================================================
    8912             :          */
    8913             :         /*      Loop over pixels in output scanline. */
    8914             :         /* ====================================================================
    8915             :          */
    8916     1403940 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    8917             :         {
    8918     1402010 :             sRect.minx = iDstX;
    8919     1402010 :             sRect.maxx = iDstX + 1;
    8920     1402010 :             int nSourcePixels = 0;
    8921             :             void **pahSourcePixel =
    8922     1402010 :                 CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
    8923     1402010 :             if (nSourcePixels == 0)
    8924             :             {
    8925     1183090 :                 CPLFree(pahSourcePixel);
    8926     1183100 :                 continue;
    8927             :             }
    8928             : 
    8929      218919 :             std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
    8930      218919 :             std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
    8931      218919 :             std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
    8932      218919 :             std::fill(adfWeight.begin(), adfWeight.end(), 0);
    8933      218919 :             double dfDensity = 0;
    8934             :             // Just above zero to please Coveriy Scan
    8935      218919 :             double dfTotalWeight = std::numeric_limits<double>::min();
    8936             : 
    8937             :             /* ====================================================================
    8938             :              */
    8939             :             /*          Iterate over each contributing source pixel to add its
    8940             :              */
    8941             :             /*          value weighed by the ratio of the area of its
    8942             :              * intersection  */
    8943             :             /*          with the target pixel divided by the area of the source
    8944             :              */
    8945             :             /*          pixel. */
    8946             :             /* ====================================================================
    8947             :              */
    8948     1020550 :             for (int i = 0; i < nSourcePixels; ++i)
    8949             :             {
    8950      801628 :                 const int iSourcePixel = static_cast<int>(
    8951      801628 :                     reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
    8952      801628 :                 auto &sp = sourcePixels[iSourcePixel];
    8953             : 
    8954      801628 :                 double dfWeight = 0.0;
    8955      801628 :                 if (bIsAffineNoRotation)
    8956             :                 {
    8957             :                     // Optimization since the source pixel is a rectangle in
    8958             :                     // target pixel coordinates
    8959       16326 :                     double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
    8960       16326 :                     double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
    8961       16326 :                     double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
    8962       16326 :                     double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
    8963       16326 :                     double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
    8964       16326 :                     double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
    8965       16326 :                     double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
    8966       16326 :                     double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
    8967       16326 :                     dfWeight =
    8968       16326 :                         ((dfIntersMaxX - dfIntersMinX) *
    8969       16326 :                          (dfIntersMaxY - dfIntersMinY)) /
    8970       16326 :                         ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
    8971             :                 }
    8972             :                 else
    8973             :                 {
    8974             :                     // Compute the polygon of the source pixel in target pixel
    8975             :                     // coordinates, and shifted to the target pixel (unit square
    8976             :                     // coordinates)
    8977             : 
    8978      785302 :                     xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    8979      785302 :                     xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
    8980      785302 :                     xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
    8981      785302 :                     xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
    8982      785302 :                     xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    8983             : 
    8984      785302 :                     if (isConvex(xy2))
    8985             :                     {
    8986      785302 :                         getConvexPolyIntersection(xy1, xy2, intersection);
    8987      785302 :                         if (intersection.size() >= 3)
    8988             :                         {
    8989      468849 :                             dfWeight = getArea(intersection);
    8990             :                         }
    8991             :                     }
    8992             :                     else
    8993             :                     {
    8994             :                         // Split xy2 into 2 triangles.
    8995           0 :                         xy2_triangle[0] = xy2[0];
    8996           0 :                         xy2_triangle[1] = xy2[1];
    8997           0 :                         xy2_triangle[2] = xy2[2];
    8998           0 :                         xy2_triangle[3] = xy2[0];
    8999           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    9000             :                                                   intersection);
    9001           0 :                         if (intersection.size() >= 3)
    9002             :                         {
    9003           0 :                             dfWeight = getArea(intersection);
    9004             :                         }
    9005             : 
    9006           0 :                         xy2_triangle[1] = xy2[2];
    9007           0 :                         xy2_triangle[2] = xy2[3];
    9008           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    9009             :                                                   intersection);
    9010           0 :                         if (intersection.size() >= 3)
    9011             :                         {
    9012           0 :                             dfWeight += getArea(intersection);
    9013             :                         }
    9014             :                     }
    9015      785302 :                     if (dfWeight > 0.0)
    9016             :                     {
    9017      468828 :                         if (sp.dfArea == 0)
    9018       89592 :                             sp.dfArea = getArea(xy2);
    9019      468828 :                         dfWeight /= sp.dfArea;
    9020             :                     }
    9021             : 
    9022             : #ifdef CHECK_SUM_WITH_GEOS
    9023             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
    9024             :                                          sp.dfDstX0 - iDstX,
    9025             :                                          sp.dfDstY0 - iDstY);
    9026             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
    9027             :                                          sp.dfDstX1 - iDstX,
    9028             :                                          sp.dfDstY1 - iDstY);
    9029             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
    9030             :                                          sp.dfDstX2 - iDstX,
    9031             :                                          sp.dfDstY2 - iDstY);
    9032             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
    9033             :                                          sp.dfDstX3 - iDstX,
    9034             :                                          sp.dfDstY3 - iDstY);
    9035             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
    9036             :                                          sp.dfDstX0 - iDstX,
    9037             :                                          sp.dfDstY0 - iDstY);
    9038             : 
    9039             :                     double dfWeightGEOS = 0.0;
    9040             :                     auto hIntersection =
    9041             :                         GEOSIntersection_r(hGEOSContext, hP1, hP2);
    9042             :                     if (hIntersection)
    9043             :                     {
    9044             :                         double dfIntersArea = 0.0;
    9045             :                         if (GEOSArea_r(hGEOSContext, hIntersection,
    9046             :                                        &dfIntersArea) &&
    9047             :                             dfIntersArea > 0)
    9048             :                         {
    9049             :                             double dfSourceArea = 0.0;
    9050             :                             if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
    9051             :                             {
    9052             :                                 dfWeightGEOS = dfIntersArea / dfSourceArea;
    9053             :                             }
    9054             :                         }
    9055             :                         GEOSGeom_destroy_r(hGEOSContext, hIntersection);
    9056             :                     }
    9057             :                     if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
    9058             :                     {
    9059             :                         /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
    9060             :                                         dfWeight, dfWeightGEOS);
    9061             :                         printf("xy2: ");  // ok
    9062             :                         for (const auto &xy : xy2)
    9063             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    9064             :                         printf("\n");                                   // ok
    9065             :                         printf("intersection: ");                       // ok
    9066             :                         for (const auto &xy : intersection)
    9067             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    9068             :                         printf("\n");                                   // ok
    9069             :                     }
    9070             : #endif
    9071             :                 }
    9072      801628 :                 if (dfWeight > 0.0)
    9073             :                 {
    9074             : #ifdef DEBUG_VERBOSE
    9075             : #if defined(DST_X) && defined(DST_Y)
    9076             :                     if (iDstX + poWK->nDstXOff == DST_X &&
    9077             :                         iDstY + poWK->nDstYOff == DST_Y)
    9078             :                     {
    9079             :                         CPLDebug("WARP",
    9080             :                                  "iSrcX = %d, iSrcY = %d, weight =%.17g",
    9081             :                                  sp.iSrcX + poWK->nSrcXOff,
    9082             :                                  sp.iSrcY + poWK->nSrcYOff, dfWeight);
    9083             :                     }
    9084             : #endif
    9085             : #endif
    9086             : 
    9087      474104 :                     const GPtrDiff_t iSrcOffset =
    9088      474104 :                         sp.iSrcX +
    9089      474104 :                         static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
    9090      474104 :                     dfTotalWeight += dfWeight;
    9091             : 
    9092      474104 :                     if (poWK->pafUnifiedSrcDensity != nullptr)
    9093             :                     {
    9094           0 :                         dfDensity +=
    9095           0 :                             dfWeight *
    9096           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    9097             :                     }
    9098             :                     else
    9099             :                     {
    9100      474104 :                         dfDensity += dfWeight;
    9101             :                     }
    9102             : 
    9103     1818730 :                     for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    9104             :                     {
    9105             :                         // Returns pixel value if it is not no data.
    9106             :                         double dfBandDensity;
    9107             :                         double dfRealValue;
    9108             :                         double dfImagValue;
    9109     2689250 :                         if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
    9110             :                                                &dfBandDensity, &dfRealValue,
    9111             :                                                &dfImagValue) &&
    9112     1344620 :                               dfBandDensity > BAND_DENSITY_THRESHOLD))
    9113             :                         {
    9114           0 :                             continue;
    9115             :                         }
    9116             : #ifdef DEBUG_VERBOSE
    9117             : #if defined(DST_X) && defined(DST_Y)
    9118             :                         if (iDstX + poWK->nDstXOff == DST_X &&
    9119             :                             iDstY + poWK->nDstYOff == DST_Y)
    9120             :                         {
    9121             :                             CPLDebug("WARP", "value * weight = %.17g",
    9122             :                                      dfRealValue * dfWeight);
    9123             :                         }
    9124             : #endif
    9125             : #endif
    9126             : 
    9127     1344620 :                         adfRealValue[iBand] += dfRealValue * dfWeight;
    9128     1344620 :                         adfImagValue[iBand] += dfImagValue * dfWeight;
    9129     1344620 :                         adfBandDensity[iBand] += dfBandDensity * dfWeight;
    9130     1344620 :                         adfWeight[iBand] += dfWeight;
    9131             :                     }
    9132             :                 }
    9133             :             }
    9134             : 
    9135      218919 :             CPLFree(pahSourcePixel);
    9136             : 
    9137             :             /* --------------------------------------------------------------------
    9138             :              */
    9139             :             /*          Update destination pixel value. */
    9140             :             /* --------------------------------------------------------------------
    9141             :              */
    9142      218919 :             bool bHasFoundDensity = false;
    9143      218919 :             const GPtrDiff_t iDstOffset =
    9144      218919 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    9145      827838 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    9146             :             {
    9147      608919 :                 if (adfWeight[iBand] > 0)
    9148             :                 {
    9149             :                     const double dfBandDensity =
    9150      608909 :                         adfBandDensity[iBand] / adfWeight[iBand];
    9151      608909 :                     if (dfBandDensity > BAND_DENSITY_THRESHOLD)
    9152             :                     {
    9153      608909 :                         bHasFoundDensity = true;
    9154      608909 :                         GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    9155      608909 :                                          adfRealValue[iBand],
    9156      608909 :                                          adfImagValue[iBand],
    9157             :                                          bAvoidNoDataSingleBand);
    9158             :                     }
    9159             :                 }
    9160             :             }
    9161             : 
    9162      218919 :             if (!bHasFoundDensity)
    9163          10 :                 continue;
    9164             : 
    9165      218909 :             if (!bAvoidNoDataSingleBand)
    9166             :             {
    9167           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    9168             :             }
    9169             : 
    9170             :             /* --------------------------------------------------------------------
    9171             :              */
    9172             :             /*          Update destination density/validity masks. */
    9173             :             /* --------------------------------------------------------------------
    9174             :              */
    9175      218909 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
    9176             : 
    9177      218909 :             if (poWK->panDstValid != nullptr)
    9178             :             {
    9179       11752 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    9180             :             }
    9181             :         }
    9182             : 
    9183             :         /* --------------------------------------------------------------------
    9184             :          */
    9185             :         /*      Report progress to the user, and optionally cancel out. */
    9186             :         /* --------------------------------------------------------------------
    9187             :          */
    9188        1932 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    9189           0 :             break;
    9190             :     }
    9191             : 
    9192             : #ifdef CHECK_SUM_WITH_GEOS
    9193             :     GEOSGeom_destroy_r(hGEOSContext, hP1);
    9194             :     GEOSGeom_destroy_r(hGEOSContext, hP2);
    9195             :     OGRGeometry::freeGEOSContext(hGEOSContext);
    9196             : #endif
    9197          19 :     CPLQuadTreeDestroy(hQuadTree);
    9198          19 : }

Generated by: LCOV version 1.14