LCOV - code coverage report
Current view: top level - alg - gdalwarpkernel.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 3044 3758 81.0 %
Date: 2024-11-21 22:18:42 Functions: 153 161 95.0 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  High Performance Image Reprojector
       4             :  * Purpose:  Implementation of the GDALWarpKernel class.  Implements the actual
       5             :  *           image warping for a "chunk" of input and output imagery already
       6             :  *           loaded into memory.
       7             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       8             :  *
       9             :  ******************************************************************************
      10             :  * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
      11             :  * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
      12             :  *
      13             :  * SPDX-License-Identifier: MIT
      14             :  ****************************************************************************/
      15             : 
      16             : #include "cpl_port.h"
      17             : #include "gdalwarper.h"
      18             : 
      19             : #include <cfloat>
      20             : #include <cmath>
      21             : #include <cstddef>
      22             : #include <cstdlib>
      23             : #include <cstring>
      24             : 
      25             : #include <algorithm>
      26             : #include <limits>
      27             : #include <mutex>
      28             : #include <new>
      29             : #include <utility>
      30             : #include <vector>
      31             : 
      32             : #include "cpl_atomic_ops.h"
      33             : #include "cpl_conv.h"
      34             : #include "cpl_error.h"
      35             : #include "cpl_mask.h"
      36             : #include "cpl_multiproc.h"
      37             : #include "cpl_progress.h"
      38             : #include "cpl_string.h"
      39             : #include "cpl_vsi.h"
      40             : #include "cpl_worker_thread_pool.h"
      41             : #include "cpl_quad_tree.h"
      42             : #include "gdal.h"
      43             : #include "gdal_alg.h"
      44             : #include "gdal_alg_priv.h"
      45             : #include "gdal_thread_pool.h"
      46             : #include "gdalresamplingkernels.h"
      47             : #include "gdalwarpkernel_opencl.h"
      48             : 
      49             : // #define CHECK_SUM_WITH_GEOS
      50             : #ifdef CHECK_SUM_WITH_GEOS
      51             : #include "ogr_geometry.h"
      52             : #include "ogr_geos.h"
      53             : #endif
      54             : 
      55             : // We restrict to 64bit processors because they are guaranteed to have SSE2.
      56             : // Could possibly be used too on 32bit, but we would need to check at runtime.
      57             : #if defined(__x86_64) || defined(_M_X64)
      58             : #include "gdalsse_priv.h"
      59             : 
      60             : #if __SSE4_1__
      61             : #include <smmintrin.h>
      62             : #endif
      63             : 
      64             : #if __SSE3__
      65             : #include <pmmintrin.h>
      66             : #endif
      67             : 
      68             : #endif
      69             : 
      70             : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
      71             : constexpr float SRC_DENSITY_THRESHOLD = 0.000000001f;
      72             : 
      73             : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
      74             : 
      75             : static const int anGWKFilterRadius[] = {
      76             :     0,  // Nearest neighbour
      77             :     1,  // Bilinear
      78             :     2,  // Cubic Convolution (Catmull-Rom)
      79             :     2,  // Cubic B-Spline
      80             :     3,  // Lanczos windowed sinc
      81             :     0,  // Average
      82             :     0,  // Mode
      83             :     0,  // Reserved GRA_Gauss=7
      84             :     0,  // Max
      85             :     0,  // Min
      86             :     0,  // Med
      87             :     0,  // Q1
      88             :     0,  // Q3
      89             :     0,  // Sum
      90             :     0,  // RMS
      91             : };
      92             : 
      93             : static double GWKBilinear(double dfX);
      94             : static double GWKCubic(double dfX);
      95             : static double GWKBSpline(double dfX);
      96             : static double GWKLanczosSinc(double dfX);
      97             : 
      98             : static const FilterFuncType apfGWKFilter[] = {
      99             :     nullptr,         // Nearest neighbour
     100             :     GWKBilinear,     // Bilinear
     101             :     GWKCubic,        // Cubic Convolution (Catmull-Rom)
     102             :     GWKBSpline,      // Cubic B-Spline
     103             :     GWKLanczosSinc,  // Lanczos windowed sinc
     104             :     nullptr,         // Average
     105             :     nullptr,         // Mode
     106             :     nullptr,         // Reserved GRA_Gauss=7
     107             :     nullptr,         // Max
     108             :     nullptr,         // Min
     109             :     nullptr,         // Med
     110             :     nullptr,         // Q1
     111             :     nullptr,         // Q3
     112             :     nullptr,         // Sum
     113             :     nullptr,         // RMS
     114             : };
     115             : 
     116             : // TODO(schwehr): Can we make these functions have a const * const arg?
     117             : static double GWKBilinear4Values(double *padfVals);
     118             : static double GWKCubic4Values(double *padfVals);
     119             : static double GWKBSpline4Values(double *padfVals);
     120             : static double GWKLanczosSinc4Values(double *padfVals);
     121             : 
     122             : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
     123             :     nullptr,                // Nearest neighbour
     124             :     GWKBilinear4Values,     // Bilinear
     125             :     GWKCubic4Values,        // Cubic Convolution (Catmull-Rom)
     126             :     GWKBSpline4Values,      // Cubic B-Spline
     127             :     GWKLanczosSinc4Values,  // Lanczos windowed sinc
     128             :     nullptr,                // Average
     129             :     nullptr,                // Mode
     130             :     nullptr,                // Reserved GRA_Gauss=7
     131             :     nullptr,                // Max
     132             :     nullptr,                // Min
     133             :     nullptr,                // Med
     134             :     nullptr,                // Q1
     135             :     nullptr,                // Q3
     136             :     nullptr,                // Sum
     137             :     nullptr,                // RMS
     138             : };
     139             : 
     140        9525 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
     141             : {
     142             :     static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
     143             :                   "Bad size of anGWKFilterRadius");
     144        9525 :     return anGWKFilterRadius[eResampleAlg];
     145             : }
     146             : 
     147        3644 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
     148             : {
     149             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
     150             :                   "Bad size of apfGWKFilter");
     151        3644 :     return apfGWKFilter[eResampleAlg];
     152             : }
     153             : 
     154        3646 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
     155             : {
     156             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
     157             :                   "Bad size of apfGWKFilter4Values");
     158        3646 :     return apfGWKFilter4Values[eResampleAlg];
     159             : }
     160             : 
     161             : #ifdef HAVE_OPENCL
     162             : static CPLErr GWKOpenCLCase(GDALWarpKernel *);
     163             : #endif
     164             : 
     165             : static CPLErr GWKGeneralCase(GDALWarpKernel *);
     166             : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
     167             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     168             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     169             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     170             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     171             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     172             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     173             : #endif
     174             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     175             : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
     176             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     177             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     178             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     179             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     180             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     181             : #endif
     182             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     183             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     184             : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
     185             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     186             : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
     187             : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
     188             : static CPLErr GWKSumPreserving(GDALWarpKernel *);
     189             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     190             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     191             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     192             : 
     193             : /************************************************************************/
     194             : /*                           GWKJobStruct                               */
     195             : /************************************************************************/
     196             : 
     197             : struct GWKJobStruct
     198             : {
     199             :     std::mutex &mutex;
     200             :     std::condition_variable &cv;
     201             :     int &counter;
     202             :     bool &stopFlag;
     203             :     GDALWarpKernel *poWK;
     204             :     int iYMin;
     205             :     int iYMax;
     206             :     int (*pfnProgress)(GWKJobStruct *psJob);
     207             :     void *pTransformerArg;
     208             :     void (*pfnFunc)(
     209             :         void *);  // used by GWKRun() to assign the proper pTransformerArg
     210             : 
     211        2015 :     GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
     212             :                  int &counter_, bool &stopFlag_)
     213        2015 :         : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_),
     214             :           poWK(nullptr), iYMin(0), iYMax(0), pfnProgress(nullptr),
     215        2015 :           pTransformerArg(nullptr), pfnFunc(nullptr)
     216             :     {
     217        2015 :     }
     218             : };
     219             : 
     220             : struct GWKThreadData
     221             : {
     222             :     std::unique_ptr<CPLJobQueue> poJobQueue{};
     223             :     std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
     224             :     int nMaxThreads{0};
     225             :     int counter{0};
     226             :     bool stopFlag{false};
     227             :     std::mutex mutex{};
     228             :     std::condition_variable cv{};
     229             :     bool bTransformerArgInputAssignedToThread{false};
     230             :     void *pTransformerArgInput{
     231             :         nullptr};  // owned by calling layer. Not to be destroyed
     232             :     std::map<GIntBig, void *> mapThreadToTransformerArg{};
     233             :     int nTotalThreadCountForThisRun = 0;
     234             :     int nCurThreadCountForThisRun = 0;
     235             : };
     236             : 
     237             : /************************************************************************/
     238             : /*                        GWKProgressThread()                           */
     239             : /************************************************************************/
     240             : 
     241             : // Return TRUE if the computation must be interrupted.
     242          18 : static int GWKProgressThread(GWKJobStruct *psJob)
     243             : {
     244          18 :     bool stop = false;
     245             :     {
     246          18 :         std::lock_guard<std::mutex> lock(psJob->mutex);
     247          18 :         psJob->counter++;
     248          18 :         stop = psJob->stopFlag;
     249             :     }
     250          18 :     psJob->cv.notify_one();
     251             : 
     252          18 :     return stop;
     253             : }
     254             : 
     255             : /************************************************************************/
     256             : /*                      GWKProgressMonoThread()                         */
     257             : /************************************************************************/
     258             : 
     259             : // Return TRUE if the computation must be interrupted.
     260      198631 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
     261             : {
     262      198631 :     GDALWarpKernel *poWK = psJob->poWK;
     263             :     // coverity[missing_lock]
     264      198631 :     if (!poWK->pfnProgress(
     265      198631 :             poWK->dfProgressBase +
     266      198631 :                 poWK->dfProgressScale *
     267      198631 :                     (++psJob->counter / static_cast<double>(psJob->iYMax)),
     268             :             "", poWK->pProgress))
     269             :     {
     270           1 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     271           1 :         psJob->stopFlag = true;
     272           1 :         return TRUE;
     273             :     }
     274      198630 :     return FALSE;
     275             : }
     276             : 
     277             : /************************************************************************/
     278             : /*                       GWKGenericMonoThread()                         */
     279             : /************************************************************************/
     280             : 
     281        2010 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
     282             :                                    void (*pfnFunc)(void *pUserData))
     283             : {
     284        2010 :     GWKThreadData td;
     285             : 
     286             :     // NOTE: the mutex is not used.
     287        2010 :     GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
     288        2010 :     job.poWK = poWK;
     289        2010 :     job.iYMin = 0;
     290        2010 :     job.iYMax = poWK->nDstYSize;
     291        2010 :     job.pfnProgress = GWKProgressMonoThread;
     292        2010 :     job.pTransformerArg = poWK->pTransformerArg;
     293        2010 :     pfnFunc(&job);
     294             : 
     295        4020 :     return td.stopFlag ? CE_Failure : CE_None;
     296             : }
     297             : 
     298             : /************************************************************************/
     299             : /*                          GWKThreadsCreate()                          */
     300             : /************************************************************************/
     301             : 
     302        1382 : void *GWKThreadsCreate(char **papszWarpOptions,
     303             :                        GDALTransformerFunc /* pfnTransformer */,
     304             :                        void *pTransformerArg)
     305             : {
     306             :     const char *pszWarpThreads =
     307        1382 :         CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
     308        1382 :     if (pszWarpThreads == nullptr)
     309        1382 :         pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
     310             : 
     311        1382 :     int nThreads = 0;
     312        1382 :     if (EQUAL(pszWarpThreads, "ALL_CPUS"))
     313           3 :         nThreads = CPLGetNumCPUs();
     314             :     else
     315        1379 :         nThreads = atoi(pszWarpThreads);
     316        1382 :     if (nThreads <= 1)
     317        1377 :         nThreads = 0;
     318        1382 :     if (nThreads > 128)
     319           0 :         nThreads = 128;
     320             : 
     321        1382 :     GWKThreadData *psThreadData = new GWKThreadData();
     322             :     auto poThreadPool =
     323        1382 :         nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
     324        1382 :     if (nThreads && poThreadPool)
     325             :     {
     326           5 :         psThreadData->nMaxThreads = nThreads;
     327           5 :         psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
     328             :             nThreads,
     329           5 :             GWKJobStruct(psThreadData->mutex, psThreadData->cv,
     330          10 :                          psThreadData->counter, psThreadData->stopFlag)));
     331             : 
     332           5 :         psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
     333           5 :         psThreadData->pTransformerArgInput = pTransformerArg;
     334             :     }
     335             : 
     336        1382 :     return psThreadData;
     337             : }
     338             : 
     339             : /************************************************************************/
     340             : /*                             GWKThreadsEnd()                          */
     341             : /************************************************************************/
     342             : 
     343        1382 : void GWKThreadsEnd(void *psThreadDataIn)
     344             : {
     345        1382 :     if (psThreadDataIn == nullptr)
     346           0 :         return;
     347             : 
     348        1382 :     GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
     349        1382 :     if (psThreadData->poJobQueue)
     350             :     {
     351             :         // cppcheck-suppress constVariableReference
     352          15 :         for (auto &pair : psThreadData->mapThreadToTransformerArg)
     353             :         {
     354          10 :             CPLAssert(pair.second != psThreadData->pTransformerArgInput);
     355          10 :             GDALDestroyTransformer(pair.second);
     356             :         }
     357           5 :         psThreadData->poJobQueue.reset();
     358             :     }
     359        1382 :     delete psThreadData;
     360             : }
     361             : 
     362             : /************************************************************************/
     363             : /*                         ThreadFuncAdapter()                          */
     364             : /************************************************************************/
     365             : 
     366          15 : static void ThreadFuncAdapter(void *pData)
     367             : {
     368          15 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
     369          15 :     GWKThreadData *psThreadData =
     370          15 :         static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
     371             : 
     372             :     // Look if we have already a per-thread transformer
     373          15 :     void *pTransformerArg = nullptr;
     374          15 :     const GIntBig nThreadId = CPLGetPID();
     375             : 
     376             :     {
     377          30 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     378          15 :         ++psThreadData->nCurThreadCountForThisRun;
     379             : 
     380          15 :         auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
     381          15 :         if (oIter != psThreadData->mapThreadToTransformerArg.end())
     382             :         {
     383           1 :             pTransformerArg = oIter->second;
     384             :         }
     385          14 :         else if (!psThreadData->bTransformerArgInputAssignedToThread &&
     386          14 :                  psThreadData->nCurThreadCountForThisRun ==
     387          14 :                      psThreadData->nTotalThreadCountForThisRun)
     388             :         {
     389             :             // If we are the last thread to be started, temporarily borrow the
     390             :             // original transformer
     391           4 :             psThreadData->bTransformerArgInputAssignedToThread = true;
     392           4 :             pTransformerArg = psThreadData->pTransformerArgInput;
     393           4 :             psThreadData->mapThreadToTransformerArg[nThreadId] =
     394             :                 pTransformerArg;
     395             :         }
     396             : 
     397          15 :         if (pTransformerArg == nullptr)
     398             :         {
     399          10 :             CPLAssert(psThreadData->pTransformerArgInput != nullptr);
     400          10 :             CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
     401             :         }
     402             :     }
     403             : 
     404             :     // If no transformer assigned to current thread, instantiate one
     405          15 :     if (pTransformerArg == nullptr)
     406             :     {
     407             :         // This somehow assumes that GDALCloneTransformer() is thread-safe
     408             :         // which should normally be the case.
     409             :         pTransformerArg =
     410          10 :             GDALCloneTransformer(psThreadData->pTransformerArgInput);
     411             : 
     412             :         // Lock for the stop flag and the transformer map.
     413          10 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     414          10 :         if (!pTransformerArg)
     415             :         {
     416           0 :             psJob->stopFlag = true;
     417           0 :             return;
     418             :         }
     419          10 :         psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
     420             :     }
     421             : 
     422          15 :     psJob->pTransformerArg = pTransformerArg;
     423          15 :     psJob->pfnFunc(pData);
     424             : 
     425             :     // Give back original transformer, if borrowed.
     426             :     {
     427          30 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     428          15 :         if (psThreadData->bTransformerArgInputAssignedToThread &&
     429           5 :             pTransformerArg == psThreadData->pTransformerArgInput)
     430             :         {
     431             :             psThreadData->mapThreadToTransformerArg.erase(
     432           4 :                 psThreadData->mapThreadToTransformerArg.find(nThreadId));
     433           4 :             psThreadData->bTransformerArgInputAssignedToThread = false;
     434             :         }
     435             :     }
     436             : }
     437             : 
     438             : /************************************************************************/
     439             : /*                                GWKRun()                              */
     440             : /************************************************************************/
     441             : 
     442        2015 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
     443             :                      void (*pfnFunc)(void *pUserData))
     444             : 
     445             : {
     446        2015 :     const int nDstYSize = poWK->nDstYSize;
     447             : 
     448        2015 :     CPLDebug("GDAL",
     449             :              "GDALWarpKernel()::%s() "
     450             :              "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
     451             :              pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
     452             :              poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
     453             :              poWK->nDstYSize);
     454             : 
     455        2015 :     if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
     456             :     {
     457           0 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     458           0 :         return CE_Failure;
     459             :     }
     460             : 
     461        2015 :     GWKThreadData *psThreadData =
     462             :         static_cast<GWKThreadData *>(poWK->psThreadData);
     463        2015 :     if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
     464             :     {
     465        2010 :         return GWKGenericMonoThread(poWK, pfnFunc);
     466             :     }
     467             : 
     468           5 :     int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
     469             :     // Config option mostly useful for tests to be able to test multithreading
     470             :     // with small rasters
     471             :     const int nWarpChunkSize =
     472           5 :         atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
     473           5 :     if (nWarpChunkSize > 0)
     474             :     {
     475           3 :         GIntBig nChunks =
     476           3 :             static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
     477           3 :         if (nThreads > nChunks)
     478           1 :             nThreads = static_cast<int>(nChunks);
     479             :     }
     480           5 :     if (nThreads <= 0)
     481           1 :         nThreads = 1;
     482             : 
     483           5 :     CPLDebug("WARP", "Using %d threads", nThreads);
     484             : 
     485           5 :     auto &jobs = *psThreadData->threadJobs;
     486           5 :     CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
     487             :     // Fill-in job structures.
     488          20 :     for (int i = 0; i < nThreads; ++i)
     489             :     {
     490          15 :         auto &job = jobs[i];
     491          15 :         job.poWK = poWK;
     492          15 :         job.iYMin =
     493          15 :             static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
     494          15 :         job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
     495          15 :                                      nThreads);
     496          15 :         if (poWK->pfnProgress != GDALDummyProgress)
     497           1 :             job.pfnProgress = GWKProgressThread;
     498          15 :         job.pfnFunc = pfnFunc;
     499             :     }
     500             : 
     501             :     bool bStopFlag;
     502             :     {
     503           5 :         std::unique_lock<std::mutex> lock(psThreadData->mutex);
     504             : 
     505           5 :         psThreadData->nTotalThreadCountForThisRun = nThreads;
     506             :         // coverity[missing_lock]
     507           5 :         psThreadData->nCurThreadCountForThisRun = 0;
     508             : 
     509             :         // Start jobs.
     510          20 :         for (int i = 0; i < nThreads; ++i)
     511             :         {
     512          15 :             auto &job = jobs[i];
     513          15 :             psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
     514             :                                                 static_cast<void *>(&job));
     515             :         }
     516             : 
     517             :         /* --------------------------------------------------------------------
     518             :          */
     519             :         /*      Report progress. */
     520             :         /* --------------------------------------------------------------------
     521             :          */
     522           5 :         if (poWK->pfnProgress != GDALDummyProgress)
     523             :         {
     524           1 :             while (psThreadData->counter < nDstYSize)
     525             :             {
     526           1 :                 psThreadData->cv.wait(lock);
     527           1 :                 if (!poWK->pfnProgress(poWK->dfProgressBase +
     528           1 :                                            poWK->dfProgressScale *
     529           1 :                                                (psThreadData->counter /
     530           1 :                                                 static_cast<double>(nDstYSize)),
     531             :                                        "", poWK->pProgress))
     532             :                 {
     533           1 :                     CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     534           1 :                     psThreadData->stopFlag = true;
     535           1 :                     break;
     536             :                 }
     537             :             }
     538             :         }
     539             : 
     540           5 :         bStopFlag = psThreadData->stopFlag;
     541             :     }
     542             : 
     543             :     /* -------------------------------------------------------------------- */
     544             :     /*      Wait for all jobs to complete.                                  */
     545             :     /* -------------------------------------------------------------------- */
     546           5 :     psThreadData->poJobQueue->WaitCompletion();
     547             : 
     548           5 :     return bStopFlag ? CE_Failure : CE_None;
     549             : }
     550             : 
     551             : /************************************************************************/
     552             : /* ==================================================================== */
     553             : /*                            GDALWarpKernel                            */
     554             : /* ==================================================================== */
     555             : /************************************************************************/
     556             : 
     557             : /**
     558             :  * \class GDALWarpKernel "gdalwarper.h"
     559             :  *
     560             :  * Low level image warping class.
     561             :  *
     562             :  * This class is responsible for low level image warping for one
     563             :  * "chunk" of imagery.  The class is essentially a structure with all
     564             :  * data members public - primarily so that new special-case functions
     565             :  * can be added without changing the class declaration.
     566             :  *
     567             :  * Applications are normally intended to interactive with warping facilities
     568             :  * through the GDALWarpOperation class, though the GDALWarpKernel can in
     569             :  * theory be used directly if great care is taken in setting up the
     570             :  * control data.
     571             :  *
     572             :  * <h3>Design Issues</h3>
     573             :  *
     574             :  * The intention is that PerformWarp() would analyze the setup in terms
     575             :  * of the datatype, resampling type, and validity/density mask usage and
     576             :  * pick one of many specific implementations of the warping algorithm over
     577             :  * a continuum of optimization vs. generality.  At one end there will be a
     578             :  * reference general purpose implementation of the algorithm that supports
     579             :  * any data type (working internally in double precision complex), all three
     580             :  * resampling types, and any or all of the validity/density masks.  At the
     581             :  * other end would be highly optimized algorithms for common cases like
     582             :  * nearest neighbour resampling on GDT_Byte data with no masks.
     583             :  *
     584             :  * The full set of optimized versions have not been decided but we should
     585             :  * expect to have at least:
     586             :  *  - One for each resampling algorithm for 8bit data with no masks.
     587             :  *  - One for each resampling algorithm for float data with no masks.
     588             :  *  - One for each resampling algorithm for float data with any/all masks
     589             :  *    (essentially the generic case for just float data).
     590             :  *  - One for each resampling algorithm for 8bit data with support for
     591             :  *    input validity masks (per band or per pixel).  This handles the common
     592             :  *    case of nodata masking.
     593             :  *  - One for each resampling algorithm for float data with support for
     594             :  *    input validity masks (per band or per pixel).  This handles the common
     595             :  *    case of nodata masking.
     596             :  *
     597             :  * Some of the specializations would operate on all bands in one pass
     598             :  * (especially the ones without masking would do this), while others might
     599             :  * process each band individually to reduce code complexity.
     600             :  *
     601             :  * <h3>Masking Semantics</h3>
     602             :  *
     603             :  * A detailed explanation of the semantics of the validity and density masks,
     604             :  * and their effects on resampling kernels is needed here.
     605             :  */
     606             : 
     607             : /************************************************************************/
     608             : /*                     GDALWarpKernel Data Members                      */
     609             : /************************************************************************/
     610             : 
     611             : /**
     612             :  * \var GDALResampleAlg GDALWarpKernel::eResample;
     613             :  *
     614             :  * Resampling algorithm.
     615             :  *
     616             :  * The resampling algorithm to use.  One of GRA_NearestNeighbour, GRA_Bilinear,
     617             :  * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
     618             :  * GRA_Mode or GRA_Sum.
     619             :  *
     620             :  * This field is required. GDT_NearestNeighbour may be used as a default
     621             :  * value.
     622             :  */
     623             : 
     624             : /**
     625             :  * \var GDALDataType GDALWarpKernel::eWorkingDataType;
     626             :  *
     627             :  * Working pixel data type.
     628             :  *
     629             :  * The datatype of pixels in the source image (papabySrcimage) and
     630             :  * destination image (papabyDstImage) buffers.  Note that operations on
     631             :  * some data types (such as GDT_Byte) may be much better optimized than other
     632             :  * less common cases.
     633             :  *
     634             :  * This field is required.  It may not be GDT_Unknown.
     635             :  */
     636             : 
     637             : /**
     638             :  * \var int GDALWarpKernel::nBands;
     639             :  *
     640             :  * Number of bands.
     641             :  *
     642             :  * The number of bands (layers) of imagery being warped.  Determines the
     643             :  * number of entries in the papabySrcImage, papanBandSrcValid,
     644             :  * and papabyDstImage arrays.
     645             :  *
     646             :  * This field is required.
     647             :  */
     648             : 
     649             : /**
     650             :  * \var int GDALWarpKernel::nSrcXSize;
     651             :  *
     652             :  * Source image width in pixels.
     653             :  *
     654             :  * This field is required.
     655             :  */
     656             : 
     657             : /**
     658             :  * \var int GDALWarpKernel::nSrcYSize;
     659             :  *
     660             :  * Source image height in pixels.
     661             :  *
     662             :  * This field is required.
     663             :  */
     664             : 
     665             : /**
     666             :  * \var double GDALWarpKernel::dfSrcXExtraSize;
     667             :  *
     668             :  * Number of pixels included in nSrcXSize that are present on the edges of
     669             :  * the area of interest to take into account the width of the kernel.
     670             :  *
     671             :  * This field is required.
     672             :  */
     673             : 
     674             : /**
     675             :  * \var double GDALWarpKernel::dfSrcYExtraSize;
     676             :  *
     677             :  * Number of pixels included in nSrcYExtraSize that are present on the edges of
     678             :  * the area of interest to take into account the height of the kernel.
     679             :  *
     680             :  * This field is required.
     681             :  */
     682             : 
     683             : /**
     684             :  * \var int GDALWarpKernel::papabySrcImage;
     685             :  *
     686             :  * Array of source image band data.
     687             :  *
     688             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     689             :  * to image data.  Each individual band of image data is organized as a single
     690             :  * block of image data in left to right, then bottom to top order.  The actual
     691             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     692             :  *
     693             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     694             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     695             :  * this:
     696             :  *
     697             :  * \code
     698             :  *   float dfPixelValue;
     699             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     700             :  *   int   nPixel = 3; // Zero based.
     701             :  *   int   nLine = 4;  // Zero based.
     702             :  *
     703             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     704             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     705             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     706             :  *   dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
     707             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     708             :  * \endcode
     709             :  *
     710             :  * This field is required.
     711             :  */
     712             : 
     713             : /**
     714             :  * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
     715             :  *
     716             :  * Per band validity mask for source pixels.
     717             :  *
     718             :  * Array of pixel validity mask layers for each source band.   Each of
     719             :  * the mask layers is the same size (in pixels) as the source image with
     720             :  * one bit per pixel.  Note that it is legal (and common) for this to be
     721             :  * NULL indicating that none of the pixels are invalidated, or for some
     722             :  * band validity masks to be NULL in which case all pixels of the band are
     723             :  * valid.  The following code can be used to test the validity of a particular
     724             :  * pixel.
     725             :  *
     726             :  * \code
     727             :  *   int   bIsValid = TRUE;
     728             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     729             :  *   int   nPixel = 3; // Zero based.
     730             :  *   int   nLine = 4;  // Zero based.
     731             :  *
     732             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     733             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     734             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     735             :  *
     736             :  *   if( poKern->papanBandSrcValid != NULL
     737             :  *       && poKern->papanBandSrcValid[nBand] != NULL )
     738             :  *   {
     739             :  *       GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
     740             :  *       int    iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
     741             :  *
     742             :  *       bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
     743             :  *   }
     744             :  * \endcode
     745             :  */
     746             : 
     747             : /**
     748             :  * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
     749             :  *
     750             :  * Per pixel validity mask for source pixels.
     751             :  *
     752             :  * A single validity mask layer that applies to the pixels of all source
     753             :  * bands.  It is accessed similarly to papanBandSrcValid, but without the
     754             :  * extra level of band indirection.
     755             :  *
     756             :  * This pointer may be NULL indicating that all pixels are valid.
     757             :  *
     758             :  * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
     759             :  * the pixel isn't considered to be valid unless both arrays indicate it is
     760             :  * valid.
     761             :  */
     762             : 
     763             : /**
     764             :  * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
     765             :  *
     766             :  * Per pixel density mask for source pixels.
     767             :  *
     768             :  * A single density mask layer that applies to the pixels of all source
     769             :  * bands.  It contains values between 0.0 and 1.0 indicating the degree to
     770             :  * which this pixel should be allowed to contribute to the output result.
     771             :  *
     772             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     773             :  *
     774             :  * The density for a pixel may be accessed like this:
     775             :  *
     776             :  * \code
     777             :  *   float fDensity = 1.0;
     778             :  *   int nPixel = 3;  // Zero based.
     779             :  *   int nLine = 4;   // Zero based.
     780             :  *
     781             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     782             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     783             :  *   if( poKern->pafUnifiedSrcDensity != NULL )
     784             :  *     fDensity = poKern->pafUnifiedSrcDensity
     785             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     786             :  * \endcode
     787             :  */
     788             : 
     789             : /**
     790             :  * \var int GDALWarpKernel::nDstXSize;
     791             :  *
     792             :  * Width of destination image in pixels.
     793             :  *
     794             :  * This field is required.
     795             :  */
     796             : 
     797             : /**
     798             :  * \var int GDALWarpKernel::nDstYSize;
     799             :  *
     800             :  * Height of destination image in pixels.
     801             :  *
     802             :  * This field is required.
     803             :  */
     804             : 
     805             : /**
     806             :  * \var GByte **GDALWarpKernel::papabyDstImage;
     807             :  *
     808             :  * Array of destination image band data.
     809             :  *
     810             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     811             :  * to image data.  Each individual band of image data is organized as a single
     812             :  * block of image data in left to right, then bottom to top order.  The actual
     813             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     814             :  *
     815             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     816             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     817             :  * this:
     818             :  *
     819             :  * \code
     820             :  *   float dfPixelValue;
     821             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     822             :  *   int   nPixel = 3; // Zero based.
     823             :  *   int   nLine = 4;  // Zero based.
     824             :  *
     825             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     826             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     827             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     828             :  *   dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
     829             :  *                                  [nPixel + nLine * poKern->nSrcYSize];
     830             :  * \endcode
     831             :  *
     832             :  * This field is required.
     833             :  */
     834             : 
     835             : /**
     836             :  * \var GUInt32 *GDALWarpKernel::panDstValid;
     837             :  *
     838             :  * Per pixel validity mask for destination pixels.
     839             :  *
     840             :  * A single validity mask layer that applies to the pixels of all destination
     841             :  * bands.  It is accessed similarly to papanUnitifiedSrcValid, but based
     842             :  * on the size of the destination image.
     843             :  *
     844             :  * This pointer may be NULL indicating that all pixels are valid.
     845             :  */
     846             : 
     847             : /**
     848             :  * \var float *GDALWarpKernel::pafDstDensity;
     849             :  *
     850             :  * Per pixel density mask for destination pixels.
     851             :  *
     852             :  * A single density mask layer that applies to the pixels of all destination
     853             :  * bands.  It contains values between 0.0 and 1.0.
     854             :  *
     855             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     856             :  *
     857             :  * The density for a pixel may be accessed like this:
     858             :  *
     859             :  * \code
     860             :  *   float fDensity = 1.0;
     861             :  *   int   nPixel = 3; // Zero based.
     862             :  *   int   nLine = 4;  // Zero based.
     863             :  *
     864             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     865             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     866             :  *   if( poKern->pafDstDensity != NULL )
     867             :  *     fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
     868             :  * \endcode
     869             :  */
     870             : 
     871             : /**
     872             :  * \var int GDALWarpKernel::nSrcXOff;
     873             :  *
     874             :  * X offset to source pixel coordinates for transformation.
     875             :  *
     876             :  * See pfnTransformer.
     877             :  *
     878             :  * This field is required.
     879             :  */
     880             : 
     881             : /**
     882             :  * \var int GDALWarpKernel::nSrcYOff;
     883             :  *
     884             :  * Y offset to source pixel coordinates for transformation.
     885             :  *
     886             :  * See pfnTransformer.
     887             :  *
     888             :  * This field is required.
     889             :  */
     890             : 
     891             : /**
     892             :  * \var int GDALWarpKernel::nDstXOff;
     893             :  *
     894             :  * X offset to destination pixel coordinates for transformation.
     895             :  *
     896             :  * See pfnTransformer.
     897             :  *
     898             :  * This field is required.
     899             :  */
     900             : 
     901             : /**
     902             :  * \var int GDALWarpKernel::nDstYOff;
     903             :  *
     904             :  * Y offset to destination pixel coordinates for transformation.
     905             :  *
     906             :  * See pfnTransformer.
     907             :  *
     908             :  * This field is required.
     909             :  */
     910             : 
     911             : /**
     912             :  * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
     913             :  *
     914             :  * Source/destination location transformer.
     915             :  *
     916             :  * The function to call to transform coordinates between source image
     917             :  * pixel/line coordinates and destination image pixel/line coordinates.
     918             :  * See GDALTransformerFunc() for details of the semantics of this function.
     919             :  *
     920             :  * The GDALWarpKern algorithm will only ever use this transformer in
     921             :  * "destination to source" mode (bDstToSrc=TRUE), and will always pass
     922             :  * partial or complete scanlines of points in the destination image as
     923             :  * input.  This means, among other things, that it is safe to the
     924             :  * approximating transform GDALApproxTransform() as the transformation
     925             :  * function.
     926             :  *
     927             :  * Source and destination images may be subsets of a larger overall image.
     928             :  * The transformation algorithms will expect and return pixel/line coordinates
     929             :  * in terms of this larger image, so coordinates need to be offset by
     930             :  * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
     931             :  * passing to pfnTransformer, and after return from it.
     932             :  *
     933             :  * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
     934             :  * data to this function when it is called.
     935             :  *
     936             :  * This field is required.
     937             :  */
     938             : 
     939             : /**
     940             :  * \var void *GDALWarpKernel::pTransformerArg;
     941             :  *
     942             :  * Callback data for pfnTransformer.
     943             :  *
     944             :  * This field may be NULL if not required for the pfnTransformer being used.
     945             :  */
     946             : 
     947             : /**
     948             :  * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
     949             :  *
     950             :  * The function to call to report progress of the algorithm, and to check
     951             :  * for a requested termination of the operation.  It operates according to
     952             :  * GDALProgressFunc() semantics.
     953             :  *
     954             :  * Generally speaking the progress function will be invoked for each
     955             :  * scanline of the destination buffer that has been processed.
     956             :  *
     957             :  * This field may be NULL (internally set to GDALDummyProgress()).
     958             :  */
     959             : 
     960             : /**
     961             :  * \var void *GDALWarpKernel::pProgress;
     962             :  *
     963             :  * Callback data for pfnProgress.
     964             :  *
     965             :  * This field may be NULL if not required for the pfnProgress being used.
     966             :  */
     967             : 
     968             : /************************************************************************/
     969             : /*                           GDALWarpKernel()                           */
     970             : /************************************************************************/
     971             : 
     972        2321 : GDALWarpKernel::GDALWarpKernel()
     973             :     : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
     974             :       eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
     975             :       dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
     976             :       papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
     977             :       pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
     978             :       papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
     979             :       dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
     980             :       nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
     981             :       nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
     982             :       pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
     983             :       pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
     984        2321 :       padfDstNoDataReal(nullptr), psThreadData(nullptr)
     985             : {
     986        2321 : }
     987             : 
     988             : /************************************************************************/
     989             : /*                          ~GDALWarpKernel()                           */
     990             : /************************************************************************/
     991             : 
     992        2321 : GDALWarpKernel::~GDALWarpKernel()
     993             : {
     994        2321 : }
     995             : 
     996             : /************************************************************************/
     997             : /*                            PerformWarp()                             */
     998             : /************************************************************************/
     999             : 
    1000             : /**
    1001             :  * \fn CPLErr GDALWarpKernel::PerformWarp();
    1002             :  *
    1003             :  * This method performs the warp described in the GDALWarpKernel.
    1004             :  *
    1005             :  * @return CE_None on success or CE_Failure if an error occurs.
    1006             :  */
    1007             : 
    1008        2319 : CPLErr GDALWarpKernel::PerformWarp()
    1009             : 
    1010             : {
    1011        2319 :     const CPLErr eErr = Validate();
    1012             : 
    1013        2319 :     if (eErr != CE_None)
    1014           1 :         return eErr;
    1015             : 
    1016             :     // See #2445 and #3079.
    1017        2318 :     if (nSrcXSize <= 0 || nSrcYSize <= 0)
    1018             :     {
    1019         303 :         if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
    1020             :         {
    1021           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    1022           0 :             return CE_Failure;
    1023             :         }
    1024         303 :         return CE_None;
    1025             :     }
    1026             : 
    1027             :     /* -------------------------------------------------------------------- */
    1028             :     /*      Pre-calculate resampling scales and window sizes for filtering. */
    1029             :     /* -------------------------------------------------------------------- */
    1030             : 
    1031        2015 :     dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
    1032        2015 :     dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
    1033        2015 :     if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
    1034        1262 :         dfXScale = 1.0;
    1035        2015 :     if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
    1036        1015 :         dfYScale = 1.0;
    1037        2015 :     if (dfXScale < 1.0)
    1038             :     {
    1039         536 :         double dfXReciprocalScale = 1.0 / dfXScale;
    1040         536 :         const int nXReciprocalScale =
    1041         536 :             static_cast<int>(dfXReciprocalScale + 0.5);
    1042         536 :         if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
    1043         419 :             dfXScale = 1.0 / nXReciprocalScale;
    1044             :     }
    1045        2015 :     if (dfYScale < 1.0)
    1046             :     {
    1047         503 :         double dfYReciprocalScale = 1.0 / dfYScale;
    1048         503 :         const int nYReciprocalScale =
    1049         503 :             static_cast<int>(dfYReciprocalScale + 0.5);
    1050         503 :         if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
    1051         356 :             dfYScale = 1.0 / nYReciprocalScale;
    1052             :     }
    1053             : 
    1054             :     // XSCALE and YSCALE undocumented for now. Can help in some cases.
    1055             :     // Best would probably be a per-pixel scale computation.
    1056        2015 :     const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
    1057        2015 :     if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
    1058           1 :         dfXScale = CPLAtof(pszXScale);
    1059        2015 :     const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
    1060        2015 :     if (pszYScale != nullptr)
    1061           1 :         dfYScale = CPLAtof(pszYScale);
    1062             : 
    1063             :     // If the xscale is significantly lower than the yscale, this is highly
    1064             :     // suspicious of a situation of wrapping a very large virtual file in
    1065             :     // geographic coordinates with left and right parts being close to the
    1066             :     // antimeridian. In that situation, the xscale computed by the above method
    1067             :     // is completely wrong. Prefer doing an average of a few sample points
    1068             :     // instead
    1069        2015 :     if ((dfYScale / dfXScale > 100 ||
    1070           1 :          (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
    1071             :     {
    1072             :         // Sample points along a grid
    1073           4 :         const int nPointsX = std::min(10, nDstXSize);
    1074           4 :         const int nPointsY = std::min(10, nDstYSize);
    1075           4 :         const int nPoints = 3 * nPointsX * nPointsY;
    1076           8 :         std::vector<double> padfX;
    1077           8 :         std::vector<double> padfY;
    1078           8 :         std::vector<double> padfZ(nPoints);
    1079           8 :         std::vector<int> pabSuccess(nPoints);
    1080          44 :         for (int iY = 0; iY < nPointsY; iY++)
    1081             :         {
    1082         440 :             for (int iX = 0; iX < nPointsX; iX++)
    1083             :             {
    1084         400 :                 const double dfX =
    1085             :                     nPointsX == 1
    1086         400 :                         ? 0.0
    1087         400 :                         : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
    1088         400 :                 const double dfY =
    1089             :                     nPointsY == 1
    1090         400 :                         ? 0.0
    1091         400 :                         : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
    1092             : 
    1093             :                 // Reproject each destination sample point and its neighbours
    1094             :                 // at (x+1,y) and (x,y+1), so as to get the local scale.
    1095         400 :                 padfX.push_back(dfX);
    1096         400 :                 padfY.push_back(dfY);
    1097             : 
    1098         400 :                 padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
    1099         400 :                 padfY.push_back(dfY);
    1100             : 
    1101         400 :                 padfX.push_back(dfX);
    1102         400 :                 padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
    1103             :             }
    1104             :         }
    1105           4 :         pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
    1106           4 :                        &padfZ[0], &pabSuccess[0]);
    1107             : 
    1108             :         // Compute the xscale at each sampling point
    1109           8 :         std::vector<double> adfXScales;
    1110         404 :         for (int i = 0; i < nPoints; i += 3)
    1111             :         {
    1112         400 :             if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
    1113             :             {
    1114             :                 const double dfPointXScale =
    1115         400 :                     1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
    1116         800 :                                    std::abs(padfX[i + 2] - padfX[i]));
    1117         400 :                 adfXScales.push_back(dfPointXScale);
    1118             :             }
    1119             :         }
    1120             : 
    1121             :         // Sort by increasing xcale
    1122           4 :         std::sort(adfXScales.begin(), adfXScales.end());
    1123             : 
    1124           4 :         if (!adfXScales.empty())
    1125             :         {
    1126             :             // Compute the average of scales, but eliminate outliers small
    1127             :             // scales, if some samples are just along the discontinuity.
    1128           4 :             const double dfMaxPointXScale = adfXScales.back();
    1129           4 :             double dfSumPointXScale = 0;
    1130           4 :             int nCountPointScale = 0;
    1131         404 :             for (double dfPointXScale : adfXScales)
    1132             :             {
    1133         400 :                 if (dfPointXScale > dfMaxPointXScale / 10)
    1134             :                 {
    1135         398 :                     dfSumPointXScale += dfPointXScale;
    1136         398 :                     nCountPointScale++;
    1137             :                 }
    1138             :             }
    1139           4 :             if (nCountPointScale > 0)  // should always be true
    1140             :             {
    1141           4 :                 const double dfXScaleFromSampling =
    1142           4 :                     dfSumPointXScale / nCountPointScale;
    1143             : #if DEBUG_VERBOSE
    1144             :                 CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
    1145             :                          dfXScaleFromSampling);
    1146             : #endif
    1147           4 :                 dfXScale = dfXScaleFromSampling;
    1148             :             }
    1149             :         }
    1150             :     }
    1151             : 
    1152             : #if DEBUG_VERBOSE
    1153             :     CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
    1154             : #endif
    1155             : 
    1156        2015 :     const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
    1157             : 
    1158             :     // Safety check for callers that would use GDALWarpKernel without using
    1159             :     // GDALWarpOperation.
    1160        1952 :     if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
    1161        1889 :          ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
    1162        4030 :           !bUse4SamplesFormula)) &&
    1163         388 :         atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
    1164             :             WARP_EXTRA_ELTS)
    1165             :     {
    1166           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1167             :                  "Source arrays must have WARP_EXTRA_ELTS extra elements at "
    1168             :                  "their end. "
    1169             :                  "See GDALWarpKernel class definition. If this condition is "
    1170             :                  "fulfilled, define a EXTRA_ELTS=%d warp options",
    1171             :                  WARP_EXTRA_ELTS);
    1172           0 :         return CE_Failure;
    1173             :     }
    1174             : 
    1175        2015 :     dfXFilter = anGWKFilterRadius[eResample];
    1176        2015 :     dfYFilter = anGWKFilterRadius[eResample];
    1177             : 
    1178        2015 :     nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
    1179        1561 :                               : static_cast<int>(dfXFilter);
    1180        2015 :     nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
    1181        1540 :                               : static_cast<int>(dfYFilter);
    1182             : 
    1183             :     // Filter window offset depends on the parity of the kernel radius.
    1184        2015 :     nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
    1185        2015 :     nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
    1186             : 
    1187        2015 :     bApplyVerticalShift =
    1188        2015 :         CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
    1189        2015 :     dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
    1190        2015 :         papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
    1191             : 
    1192             :     /* -------------------------------------------------------------------- */
    1193             :     /*      Set up resampling functions.                                    */
    1194             :     /* -------------------------------------------------------------------- */
    1195        2015 :     if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
    1196          12 :         return GWKGeneralCase(this);
    1197             : 
    1198             : #if defined(HAVE_OPENCL)
    1199         559 :     if ((eWorkingDataType == GDT_Byte || eWorkingDataType == GDT_CInt16 ||
    1200         387 :          eWorkingDataType == GDT_UInt16 || eWorkingDataType == GDT_Int16 ||
    1201         258 :          eWorkingDataType == GDT_CFloat32 || eWorkingDataType == GDT_Float32) &&
    1202        1865 :         (eResample == GRA_Bilinear || eResample == GRA_Cubic ||
    1203        1427 :          eResample == GRA_CubicSpline || eResample == GRA_Lanczos) &&
    1204        4527 :         !bApplyVerticalShift &&
    1205             :         // OpenCL warping gives different results than the ones expected by autotest,
    1206             :         // so disable it by default even if found.
    1207        1042 :         CPLTestBool(
    1208         521 :             CSLFetchNameValueDef(papszWarpOptions, "USE_OPENCL",
    1209             :                                  CPLGetConfigOption("GDAL_USE_OPENCL", "NO"))))
    1210             :     {
    1211           0 :         if (pafUnifiedSrcDensity != nullptr)
    1212             :         {
    1213             :             // If pafUnifiedSrcDensity is only set to 1.0, then we can
    1214             :             // discard it.
    1215           0 :             bool bFoundNotOne = false;
    1216           0 :             for (GPtrDiff_t j = 0;
    1217           0 :                  j < static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize; j++)
    1218             :             {
    1219           0 :                 if (pafUnifiedSrcDensity[j] != 1.0)
    1220             :                 {
    1221           0 :                     bFoundNotOne = true;
    1222           0 :                     break;
    1223             :                 }
    1224             :             }
    1225           0 :             if (!bFoundNotOne)
    1226             :             {
    1227           0 :                 CPLFree(pafUnifiedSrcDensity);
    1228           0 :                 pafUnifiedSrcDensity = nullptr;
    1229             :             }
    1230             :         }
    1231             : 
    1232           0 :         if (pafUnifiedSrcDensity != nullptr)
    1233             :         {
    1234             :             // Typically if there's a cutline or an alpha band
    1235             :             static bool bHasWarned = false;
    1236           0 :             if (!bHasWarned)
    1237             :             {
    1238           0 :                 bHasWarned = true;
    1239           0 :                 CPLDebug("WARP", "pafUnifiedSrcDensity is not null, "
    1240             :                                  "hence OpenCL warper cannot be used");
    1241             :             }
    1242             :         }
    1243             :         else
    1244             :         {
    1245           0 :             const CPLErr eResult = GWKOpenCLCase(this);
    1246             : 
    1247             :             // CE_Warning tells us a suitable OpenCL environment was not available
    1248             :             // so we fall through to other CPU based methods.
    1249           0 :             if (eResult != CE_Warning)
    1250           0 :                 return eResult;
    1251             :         }
    1252             :     }
    1253             : #endif  // defined HAVE_OPENCL
    1254             : 
    1255        2003 :     const bool bNoMasksOrDstDensityOnly =
    1256        1999 :         papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
    1257        4002 :         pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
    1258             : 
    1259        2003 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour &&
    1260             :         bNoMasksOrDstDensityOnly)
    1261         860 :         return GWKNearestNoMasksOrDstDensityOnlyByte(this);
    1262             : 
    1263        1143 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear &&
    1264             :         bNoMasksOrDstDensityOnly)
    1265         125 :         return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
    1266             : 
    1267        1018 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_Cubic &&
    1268             :         bNoMasksOrDstDensityOnly)
    1269          72 :         return GWKCubicNoMasksOrDstDensityOnlyByte(this);
    1270             : 
    1271         946 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline &&
    1272             :         bNoMasksOrDstDensityOnly)
    1273          12 :         return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
    1274             : 
    1275         934 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour)
    1276         276 :         return GWKNearestByte(this);
    1277             : 
    1278         658 :     if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
    1279         129 :         eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
    1280          18 :         return GWKNearestNoMasksOrDstDensityOnlyShort(this);
    1281             : 
    1282         640 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
    1283             :         bNoMasksOrDstDensityOnly)
    1284           5 :         return GWKCubicNoMasksOrDstDensityOnlyShort(this);
    1285             : 
    1286         635 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
    1287             :         bNoMasksOrDstDensityOnly)
    1288           6 :         return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
    1289             : 
    1290         629 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
    1291             :         bNoMasksOrDstDensityOnly)
    1292          18 :         return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
    1293             : 
    1294         611 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
    1295             :         bNoMasksOrDstDensityOnly)
    1296          12 :         return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
    1297             : 
    1298         599 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
    1299             :         bNoMasksOrDstDensityOnly)
    1300           5 :         return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
    1301             : 
    1302         594 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
    1303             :         bNoMasksOrDstDensityOnly)
    1304           6 :         return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
    1305             : 
    1306         588 :     if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
    1307          59 :         eResample == GRA_NearestNeighbour)
    1308          27 :         return GWKNearestShort(this);
    1309             : 
    1310         561 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
    1311             :         bNoMasksOrDstDensityOnly)
    1312          11 :         return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
    1313             : 
    1314         550 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
    1315          36 :         return GWKNearestFloat(this);
    1316             : 
    1317         514 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
    1318             :         bNoMasksOrDstDensityOnly)
    1319           5 :         return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
    1320             : 
    1321         509 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
    1322             :         bNoMasksOrDstDensityOnly)
    1323           9 :         return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
    1324             : 
    1325             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    1326             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
    1327             :         bNoMasksOrDstDensityOnly)
    1328             :         return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
    1329             : 
    1330             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
    1331             :         bNoMasksOrDstDensityOnly)
    1332             :         return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
    1333             : #endif
    1334             : 
    1335         500 :     if (eResample == GRA_Average)
    1336          71 :         return GWKAverageOrMode(this);
    1337             : 
    1338         429 :     if (eResample == GRA_RMS)
    1339           9 :         return GWKAverageOrMode(this);
    1340             : 
    1341         420 :     if (eResample == GRA_Mode)
    1342          11 :         return GWKAverageOrMode(this);
    1343             : 
    1344         409 :     if (eResample == GRA_Max)
    1345           6 :         return GWKAverageOrMode(this);
    1346             : 
    1347         403 :     if (eResample == GRA_Min)
    1348           5 :         return GWKAverageOrMode(this);
    1349             : 
    1350         398 :     if (eResample == GRA_Med)
    1351           6 :         return GWKAverageOrMode(this);
    1352             : 
    1353         392 :     if (eResample == GRA_Q1)
    1354           5 :         return GWKAverageOrMode(this);
    1355             : 
    1356         387 :     if (eResample == GRA_Q3)
    1357           5 :         return GWKAverageOrMode(this);
    1358             : 
    1359         382 :     if (eResample == GRA_Sum)
    1360          18 :         return GWKSumPreserving(this);
    1361             : 
    1362         364 :     if (!GDALDataTypeIsComplex(eWorkingDataType))
    1363             :     {
    1364         133 :         return GWKRealCase(this);
    1365             :     }
    1366             : 
    1367         231 :     return GWKGeneralCase(this);
    1368             : }
    1369             : 
    1370             : /************************************************************************/
    1371             : /*                              Validate()                              */
    1372             : /************************************************************************/
    1373             : 
    1374             : /**
    1375             :  * \fn CPLErr GDALWarpKernel::Validate()
    1376             :  *
    1377             :  * Check the settings in the GDALWarpKernel, and issue a CPLError()
    1378             :  * (and return CE_Failure) if the configuration is considered to be
    1379             :  * invalid for some reason.
    1380             :  *
    1381             :  * This method will also do some standard defaulting such as setting
    1382             :  * pfnProgress to GDALDummyProgress() if it is NULL.
    1383             :  *
    1384             :  * @return CE_None on success or CE_Failure if an error is detected.
    1385             :  */
    1386             : 
    1387        2319 : CPLErr GDALWarpKernel::Validate()
    1388             : 
    1389             : {
    1390        2319 :     if (static_cast<size_t>(eResample) >=
    1391             :         (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
    1392             :     {
    1393           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1394             :                  "Unsupported resampling method %d.",
    1395           0 :                  static_cast<int>(eResample));
    1396           0 :         return CE_Failure;
    1397             :     }
    1398             : 
    1399             :     // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
    1400             :     // be ignored as contributing source pixels during resampling. Only taken into account by
    1401             :     // Average currently
    1402             :     const char *pszExcludedValues =
    1403        2319 :         CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
    1404        2319 :     if (pszExcludedValues)
    1405             :     {
    1406             :         const CPLStringList aosTokens(
    1407           8 :             CSLTokenizeString2(pszExcludedValues, "(,)", 0));
    1408           8 :         if ((aosTokens.size() % nBands) != 0)
    1409             :         {
    1410           1 :             CPLError(CE_Failure, CPLE_AppDefined,
    1411             :                      "EXCLUDED_VALUES should contain one or several tuples of "
    1412             :                      "%d values formatted like <R>,<G>,<B> or "
    1413             :                      "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
    1414             :                      "tuples",
    1415             :                      nBands);
    1416           1 :             return CE_Failure;
    1417             :         }
    1418          14 :         std::vector<double> adfTuple;
    1419          28 :         for (int i = 0; i < aosTokens.size(); ++i)
    1420             :         {
    1421          21 :             adfTuple.push_back(CPLAtof(aosTokens[i]));
    1422          21 :             if (((i + 1) % nBands) == 0)
    1423             :             {
    1424           7 :                 m_aadfExcludedValues.push_back(adfTuple);
    1425           7 :                 adfTuple.clear();
    1426             :             }
    1427             :         }
    1428             :     }
    1429             : 
    1430        2318 :     return CE_None;
    1431             : }
    1432             : 
    1433             : /************************************************************************/
    1434             : /*                         GWKOverlayDensity()                          */
    1435             : /*                                                                      */
    1436             : /*      Compute the final density for the destination pixel.  This      */
    1437             : /*      is a function of the overlay density (passed in) and the        */
    1438             : /*      original density.                                               */
    1439             : /************************************************************************/
    1440             : 
    1441     7941280 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
    1442             :                               double dfDensity)
    1443             : {
    1444     7941280 :     if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
    1445     6750400 :         return;
    1446             : 
    1447     1190880 :     poWK->pafDstDensity[iDstOffset] = static_cast<float>(
    1448     1190880 :         1.0 - (1.0 - dfDensity) * (1.0 - poWK->pafDstDensity[iDstOffset]));
    1449             : }
    1450             : 
    1451             : /************************************************************************/
    1452             : /*                          GWKRoundValueT()                            */
    1453             : /************************************************************************/
    1454             : 
    1455             : template <class T, bool is_signed> struct sGWKRoundValueT
    1456             : {
    1457             :     static T eval(double);
    1458             : };
    1459             : 
    1460             : template <class T> struct sGWKRoundValueT<T, true> /* signed */
    1461             : {
    1462     2312700 :     static T eval(double dfValue)
    1463             :     {
    1464     2312700 :         return static_cast<T>(floor(dfValue + 0.5));
    1465             :     }
    1466             : };
    1467             : 
    1468             : template <class T> struct sGWKRoundValueT<T, false> /* unsigned */
    1469             : {
    1470    12960381 :     static T eval(double dfValue)
    1471             :     {
    1472    12960381 :         return static_cast<T>(dfValue + 0.5);
    1473             :     }
    1474             : };
    1475             : 
    1476    15244281 : template <class T> static T GWKRoundValueT(double dfValue)
    1477             : {
    1478    15244281 :     return sGWKRoundValueT<T, std::numeric_limits<T>::is_signed>::eval(dfValue);
    1479             : }
    1480             : 
    1481      269074 : template <> float GWKRoundValueT<float>(double dfValue)
    1482             : {
    1483      269074 :     return static_cast<float>(dfValue);
    1484             : }
    1485             : 
    1486             : #ifdef notused
    1487             : template <> double GWKRoundValueT<double>(double dfValue)
    1488             : {
    1489             :     return dfValue;
    1490             : }
    1491             : #endif
    1492             : 
    1493             : /************************************************************************/
    1494             : /*                            GWKClampValueT()                          */
    1495             : /************************************************************************/
    1496             : 
    1497    10463066 : template <class T> static CPL_INLINE T GWKClampValueT(double dfValue)
    1498             : {
    1499    10463066 :     if (dfValue < std::numeric_limits<T>::min())
    1500        3969 :         return std::numeric_limits<T>::min();
    1501    10450868 :     else if (dfValue > std::numeric_limits<T>::max())
    1502       18463 :         return std::numeric_limits<T>::max();
    1503             :     else
    1504    10429938 :         return GWKRoundValueT<T>(dfValue);
    1505             : }
    1506             : 
    1507      718914 : template <> float GWKClampValueT<float>(double dfValue)
    1508             : {
    1509      718914 :     return static_cast<float>(dfValue);
    1510             : }
    1511             : 
    1512             : #ifdef notused
    1513             : template <> double GWKClampValueT<double>(double dfValue)
    1514             : {
    1515             :     return dfValue;
    1516             : }
    1517             : #endif
    1518             : 
    1519             : /************************************************************************/
    1520             : /*                         GWKSetPixelValueRealT()                      */
    1521             : /************************************************************************/
    1522             : 
    1523             : template <class T>
    1524      159076 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
    1525             :                                   GPtrDiff_t iDstOffset, double dfDensity,
    1526             :                                   T value)
    1527             : {
    1528      159076 :     T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    1529             : 
    1530             :     /* -------------------------------------------------------------------- */
    1531             :     /*      If the source density is less than 100% we need to fetch the    */
    1532             :     /*      existing destination value, and mix it with the source to       */
    1533             :     /*      get the new "to apply" value.  Also compute composite           */
    1534             :     /*      density.                                                        */
    1535             :     /*                                                                      */
    1536             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1537             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1538             :     /* -------------------------------------------------------------------- */
    1539      159076 :     if (dfDensity < 0.9999)
    1540             :     {
    1541      159076 :         if (dfDensity < 0.0001)
    1542           0 :             return true;
    1543             : 
    1544      159076 :         double dfDstDensity = 1.0;
    1545             : 
    1546      159076 :         if (poWK->pafDstDensity != nullptr)
    1547      157604 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1548        1472 :         else if (poWK->panDstValid != nullptr &&
    1549           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1550           0 :             dfDstDensity = 0.0;
    1551             : 
    1552             :         // It seems like we also ought to be testing panDstValid[] here!
    1553             : 
    1554      159076 :         const double dfDstReal = pDst[iDstOffset];
    1555             : 
    1556             :         // The destination density is really only relative to the portion
    1557             :         // not occluded by the overlay.
    1558      159076 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1559             : 
    1560      159076 :         const double dfReal = (value * dfDensity + dfDstReal * dfDstInfluence) /
    1561      159076 :                               (dfDensity + dfDstInfluence);
    1562             : 
    1563             :         /* --------------------------------------------------------------------
    1564             :          */
    1565             :         /*      Actually apply the destination value. */
    1566             :         /*                                                                      */
    1567             :         /*      Avoid using the destination nodata value for integer datatypes
    1568             :          */
    1569             :         /*      if by chance it is equal to the computed pixel value. */
    1570             :         /* --------------------------------------------------------------------
    1571             :          */
    1572      159076 :         pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
    1573             :     }
    1574             :     else
    1575             :     {
    1576           0 :         pDst[iDstOffset] = value;
    1577             :     }
    1578             : 
    1579      159076 :     if (poWK->padfDstNoDataReal != nullptr &&
    1580           0 :         poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
    1581             :     {
    1582           0 :         if (pDst[iDstOffset] == std::numeric_limits<T>::min())
    1583           0 :             pDst[iDstOffset] = std::numeric_limits<T>::min() + 1;
    1584             :         else
    1585           0 :             pDst[iDstOffset]--;
    1586             :     }
    1587             : 
    1588      159076 :     return true;
    1589             : }
    1590             : 
    1591             : /************************************************************************/
    1592             : /*                          GWKSetPixelValue()                          */
    1593             : /************************************************************************/
    1594             : 
    1595     3867630 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
    1596             :                              GPtrDiff_t iDstOffset, double dfDensity,
    1597             :                              double dfReal, double dfImag)
    1598             : 
    1599             : {
    1600     3867630 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1601             : 
    1602             :     /* -------------------------------------------------------------------- */
    1603             :     /*      If the source density is less than 100% we need to fetch the    */
    1604             :     /*      existing destination value, and mix it with the source to       */
    1605             :     /*      get the new "to apply" value.  Also compute composite           */
    1606             :     /*      density.                                                        */
    1607             :     /*                                                                      */
    1608             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1609             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1610             :     /* -------------------------------------------------------------------- */
    1611     3867630 :     if (dfDensity < 0.9999)
    1612             :     {
    1613         800 :         if (dfDensity < 0.0001)
    1614           0 :             return true;
    1615             : 
    1616         800 :         double dfDstDensity = 1.0;
    1617         800 :         if (poWK->pafDstDensity != nullptr)
    1618         800 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1619           0 :         else if (poWK->panDstValid != nullptr &&
    1620           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1621           0 :             dfDstDensity = 0.0;
    1622             : 
    1623         800 :         double dfDstReal = 0.0;
    1624         800 :         double dfDstImag = 0.0;
    1625             :         // It seems like we also ought to be testing panDstValid[] here!
    1626             : 
    1627             :         // TODO(schwehr): Factor out this repreated type of set.
    1628         800 :         switch (poWK->eWorkingDataType)
    1629             :         {
    1630           0 :             case GDT_Byte:
    1631           0 :                 dfDstReal = pabyDst[iDstOffset];
    1632           0 :                 dfDstImag = 0.0;
    1633           0 :                 break;
    1634             : 
    1635           0 :             case GDT_Int8:
    1636           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1637           0 :                 dfDstImag = 0.0;
    1638           0 :                 break;
    1639             : 
    1640         400 :             case GDT_Int16:
    1641         400 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1642         400 :                 dfDstImag = 0.0;
    1643         400 :                 break;
    1644             : 
    1645         400 :             case GDT_UInt16:
    1646         400 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1647         400 :                 dfDstImag = 0.0;
    1648         400 :                 break;
    1649             : 
    1650           0 :             case GDT_Int32:
    1651           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1652           0 :                 dfDstImag = 0.0;
    1653           0 :                 break;
    1654             : 
    1655           0 :             case GDT_UInt32:
    1656           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1657           0 :                 dfDstImag = 0.0;
    1658           0 :                 break;
    1659             : 
    1660           0 :             case GDT_Int64:
    1661           0 :                 dfDstReal = static_cast<double>(
    1662           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1663           0 :                 dfDstImag = 0.0;
    1664           0 :                 break;
    1665             : 
    1666           0 :             case GDT_UInt64:
    1667           0 :                 dfDstReal = static_cast<double>(
    1668           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1669           0 :                 dfDstImag = 0.0;
    1670           0 :                 break;
    1671             : 
    1672           0 :             case GDT_Float32:
    1673           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
    1674           0 :                 dfDstImag = 0.0;
    1675           0 :                 break;
    1676             : 
    1677           0 :             case GDT_Float64:
    1678           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1679           0 :                 dfDstImag = 0.0;
    1680           0 :                 break;
    1681             : 
    1682           0 :             case GDT_CInt16:
    1683           0 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
    1684           0 :                 dfDstImag =
    1685           0 :                     reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
    1686           0 :                 break;
    1687             : 
    1688           0 :             case GDT_CInt32:
    1689           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
    1690           0 :                 dfDstImag =
    1691           0 :                     reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
    1692           0 :                 break;
    1693             : 
    1694           0 :             case GDT_CFloat32:
    1695           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset * 2];
    1696           0 :                 dfDstImag =
    1697           0 :                     reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1];
    1698           0 :                 break;
    1699             : 
    1700           0 :             case GDT_CFloat64:
    1701           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
    1702           0 :                 dfDstImag =
    1703           0 :                     reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
    1704           0 :                 break;
    1705             : 
    1706           0 :             case GDT_Unknown:
    1707             :             case GDT_TypeCount:
    1708           0 :                 CPLAssert(false);
    1709             :                 return false;
    1710             :         }
    1711             : 
    1712             :         // The destination density is really only relative to the portion
    1713             :         // not occluded by the overlay.
    1714         800 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1715             : 
    1716         800 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    1717         800 :                  (dfDensity + dfDstInfluence);
    1718             : 
    1719         800 :         dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
    1720         800 :                  (dfDensity + dfDstInfluence);
    1721             :     }
    1722             : 
    1723             : /* -------------------------------------------------------------------- */
    1724             : /*      Actually apply the destination value.                           */
    1725             : /*                                                                      */
    1726             : /*      Avoid using the destination nodata value for integer datatypes  */
    1727             : /*      if by chance it is equal to the computed pixel value.           */
    1728             : /* -------------------------------------------------------------------- */
    1729             : 
    1730             : // TODO(schwehr): Can we make this a template?
    1731             : #define CLAMP(type)                                                            \
    1732             :     do                                                                         \
    1733             :     {                                                                          \
    1734             :         type *_pDst = reinterpret_cast<type *>(pabyDst);                       \
    1735             :         if (dfReal < static_cast<double>(std::numeric_limits<type>::min()))    \
    1736             :             _pDst[iDstOffset] =                                                \
    1737             :                 static_cast<type>(std::numeric_limits<type>::min());           \
    1738             :         else if (dfReal >                                                      \
    1739             :                  static_cast<double>(std::numeric_limits<type>::max()))        \
    1740             :             _pDst[iDstOffset] =                                                \
    1741             :                 static_cast<type>(std::numeric_limits<type>::max());           \
    1742             :         else                                                                   \
    1743             :             _pDst[iDstOffset] = (std::numeric_limits<type>::is_signed)         \
    1744             :                                     ? static_cast<type>(floor(dfReal + 0.5))   \
    1745             :                                     : static_cast<type>(dfReal + 0.5);         \
    1746             :         if (poWK->padfDstNoDataReal != nullptr &&                              \
    1747             :             poWK->padfDstNoDataReal[iBand] ==                                  \
    1748             :                 static_cast<double>(_pDst[iDstOffset]))                        \
    1749             :         {                                                                      \
    1750             :             if (_pDst[iDstOffset] ==                                           \
    1751             :                 static_cast<type>(std::numeric_limits<type>::min()))           \
    1752             :                 _pDst[iDstOffset] =                                            \
    1753             :                     static_cast<type>(std::numeric_limits<type>::min() + 1);   \
    1754             :             else                                                               \
    1755             :                 _pDst[iDstOffset]--;                                           \
    1756             :         }                                                                      \
    1757             :     } while (false)
    1758             : 
    1759     3867630 :     switch (poWK->eWorkingDataType)
    1760             :     {
    1761     3141450 :         case GDT_Byte:
    1762     3141450 :             CLAMP(GByte);
    1763     3141450 :             break;
    1764             : 
    1765           0 :         case GDT_Int8:
    1766           0 :             CLAMP(GInt8);
    1767           0 :             break;
    1768             : 
    1769        7465 :         case GDT_Int16:
    1770        7465 :             CLAMP(GInt16);
    1771        7465 :             break;
    1772             : 
    1773         463 :         case GDT_UInt16:
    1774         463 :             CLAMP(GUInt16);
    1775         463 :             break;
    1776             : 
    1777          63 :         case GDT_UInt32:
    1778          63 :             CLAMP(GUInt32);
    1779          63 :             break;
    1780             : 
    1781        3463 :         case GDT_Int32:
    1782        3463 :             CLAMP(GInt32);
    1783        3463 :             break;
    1784             : 
    1785           0 :         case GDT_UInt64:
    1786           0 :             CLAMP(std::uint64_t);
    1787           0 :             break;
    1788             : 
    1789           0 :         case GDT_Int64:
    1790           0 :             CLAMP(std::int64_t);
    1791           0 :             break;
    1792             : 
    1793      478957 :         case GDT_Float32:
    1794      478957 :             reinterpret_cast<float *>(pabyDst)[iDstOffset] =
    1795      478957 :                 static_cast<float>(dfReal);
    1796      478957 :             break;
    1797             : 
    1798         147 :         case GDT_Float64:
    1799         147 :             reinterpret_cast<double *>(pabyDst)[iDstOffset] = dfReal;
    1800         147 :             break;
    1801             : 
    1802      234178 :         case GDT_CInt16:
    1803             :         {
    1804             :             typedef GInt16 T;
    1805      234178 :             if (dfReal < static_cast<double>(std::numeric_limits<T>::min()))
    1806           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1807           0 :                     std::numeric_limits<T>::min();
    1808      234178 :             else if (dfReal >
    1809      234178 :                      static_cast<double>(std::numeric_limits<T>::max()))
    1810           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1811           0 :                     std::numeric_limits<T>::max();
    1812             :             else
    1813      234178 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1814      234178 :                     static_cast<T>(floor(dfReal + 0.5));
    1815      234178 :             if (dfImag < static_cast<double>(std::numeric_limits<T>::min()))
    1816           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1817           0 :                     std::numeric_limits<T>::min();
    1818      234178 :             else if (dfImag >
    1819      234178 :                      static_cast<double>(std::numeric_limits<T>::max()))
    1820           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1821           0 :                     std::numeric_limits<T>::max();
    1822             :             else
    1823      234178 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1824      234178 :                     static_cast<T>(floor(dfImag + 0.5));
    1825      234178 :             break;
    1826             :         }
    1827             : 
    1828         478 :         case GDT_CInt32:
    1829             :         {
    1830             :             typedef GInt32 T;
    1831         478 :             if (dfReal < static_cast<double>(std::numeric_limits<T>::min()))
    1832           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1833           0 :                     std::numeric_limits<T>::min();
    1834         478 :             else if (dfReal >
    1835         478 :                      static_cast<double>(std::numeric_limits<T>::max()))
    1836           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1837           0 :                     std::numeric_limits<T>::max();
    1838             :             else
    1839         478 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1840         478 :                     static_cast<T>(floor(dfReal + 0.5));
    1841         478 :             if (dfImag < static_cast<double>(std::numeric_limits<T>::min()))
    1842           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1843           0 :                     std::numeric_limits<T>::min();
    1844         478 :             else if (dfImag >
    1845         478 :                      static_cast<double>(std::numeric_limits<T>::max()))
    1846           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1847           0 :                     std::numeric_limits<T>::max();
    1848             :             else
    1849         478 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1850         478 :                     static_cast<T>(floor(dfImag + 0.5));
    1851         478 :             break;
    1852             :         }
    1853             : 
    1854         490 :         case GDT_CFloat32:
    1855         490 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
    1856         490 :                 static_cast<float>(dfReal);
    1857         490 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
    1858         490 :                 static_cast<float>(dfImag);
    1859         490 :             break;
    1860             : 
    1861         478 :         case GDT_CFloat64:
    1862         478 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
    1863         478 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
    1864         478 :             break;
    1865             : 
    1866           0 :         case GDT_Unknown:
    1867             :         case GDT_TypeCount:
    1868           0 :             return false;
    1869             :     }
    1870             : 
    1871     3867630 :     return true;
    1872             : }
    1873             : 
    1874             : /************************************************************************/
    1875             : /*                       GWKSetPixelValueReal()                         */
    1876             : /************************************************************************/
    1877             : 
    1878      923761 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    1879             :                                  GPtrDiff_t iDstOffset, double dfDensity,
    1880             :                                  double dfReal)
    1881             : 
    1882             : {
    1883      923761 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1884             : 
    1885             :     /* -------------------------------------------------------------------- */
    1886             :     /*      If the source density is less than 100% we need to fetch the    */
    1887             :     /*      existing destination value, and mix it with the source to       */
    1888             :     /*      get the new "to apply" value.  Also compute composite           */
    1889             :     /*      density.                                                        */
    1890             :     /*                                                                      */
    1891             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1892             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1893             :     /* -------------------------------------------------------------------- */
    1894      923761 :     if (dfDensity < 0.9999)
    1895             :     {
    1896         600 :         if (dfDensity < 0.0001)
    1897           0 :             return true;
    1898             : 
    1899         600 :         double dfDstReal = 0.0;
    1900         600 :         double dfDstDensity = 1.0;
    1901             : 
    1902         600 :         if (poWK->pafDstDensity != nullptr)
    1903         600 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1904           0 :         else if (poWK->panDstValid != nullptr &&
    1905           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1906           0 :             dfDstDensity = 0.0;
    1907             : 
    1908             :         // It seems like we also ought to be testing panDstValid[] here!
    1909             : 
    1910         600 :         switch (poWK->eWorkingDataType)
    1911             :         {
    1912           0 :             case GDT_Byte:
    1913           0 :                 dfDstReal = pabyDst[iDstOffset];
    1914           0 :                 break;
    1915             : 
    1916           0 :             case GDT_Int8:
    1917           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1918           0 :                 break;
    1919             : 
    1920         300 :             case GDT_Int16:
    1921         300 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1922         300 :                 break;
    1923             : 
    1924         300 :             case GDT_UInt16:
    1925         300 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1926         300 :                 break;
    1927             : 
    1928           0 :             case GDT_Int32:
    1929           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1930           0 :                 break;
    1931             : 
    1932           0 :             case GDT_UInt32:
    1933           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1934           0 :                 break;
    1935             : 
    1936           0 :             case GDT_Int64:
    1937           0 :                 dfDstReal = static_cast<double>(
    1938           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1939           0 :                 break;
    1940             : 
    1941           0 :             case GDT_UInt64:
    1942           0 :                 dfDstReal = static_cast<double>(
    1943           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1944           0 :                 break;
    1945             : 
    1946           0 :             case GDT_Float32:
    1947           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
    1948           0 :                 break;
    1949             : 
    1950           0 :             case GDT_Float64:
    1951           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1952           0 :                 break;
    1953             : 
    1954           0 :             case GDT_CInt16:
    1955             :             case GDT_CInt32:
    1956             :             case GDT_CFloat32:
    1957             :             case GDT_CFloat64:
    1958             :             case GDT_Unknown:
    1959             :             case GDT_TypeCount:
    1960           0 :                 CPLAssert(false);
    1961             :                 return false;
    1962             :         }
    1963             : 
    1964             :         // The destination density is really only relative to the portion
    1965             :         // not occluded by the overlay.
    1966         600 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1967             : 
    1968         600 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    1969         600 :                  (dfDensity + dfDstInfluence);
    1970             :     }
    1971             : 
    1972             :     /* -------------------------------------------------------------------- */
    1973             :     /*      Actually apply the destination value.                           */
    1974             :     /*                                                                      */
    1975             :     /*      Avoid using the destination nodata value for integer datatypes  */
    1976             :     /*      if by chance it is equal to the computed pixel value.           */
    1977             :     /* -------------------------------------------------------------------- */
    1978             : 
    1979      923761 :     switch (poWK->eWorkingDataType)
    1980             :     {
    1981      916736 :         case GDT_Byte:
    1982      916736 :             CLAMP(GByte);
    1983      916736 :             break;
    1984             : 
    1985           0 :         case GDT_Int8:
    1986           0 :             CLAMP(GInt8);
    1987           0 :             break;
    1988             : 
    1989        1085 :         case GDT_Int16:
    1990        1085 :             CLAMP(GInt16);
    1991        1085 :             break;
    1992             : 
    1993         363 :         case GDT_UInt16:
    1994         363 :             CLAMP(GUInt16);
    1995         363 :             break;
    1996             : 
    1997         315 :         case GDT_UInt32:
    1998         315 :             CLAMP(GUInt32);
    1999         315 :             break;
    2000             : 
    2001        1318 :         case GDT_Int32:
    2002        1318 :             CLAMP(GInt32);
    2003        1318 :             break;
    2004             : 
    2005           0 :         case GDT_UInt64:
    2006           0 :             CLAMP(std::uint64_t);
    2007           0 :             break;
    2008             : 
    2009         100 :         case GDT_Int64:
    2010         100 :             CLAMP(std::int64_t);
    2011         100 :             break;
    2012             : 
    2013        3426 :         case GDT_Float32:
    2014        3426 :             reinterpret_cast<float *>(pabyDst)[iDstOffset] =
    2015        3426 :                 static_cast<float>(dfReal);
    2016        3426 :             break;
    2017             : 
    2018         418 :         case GDT_Float64:
    2019         418 :             reinterpret_cast<double *>(pabyDst)[iDstOffset] = dfReal;
    2020         418 :             break;
    2021             : 
    2022           0 :         case GDT_CInt16:
    2023             :         case GDT_CInt32:
    2024             :         case GDT_CFloat32:
    2025             :         case GDT_CFloat64:
    2026           0 :             return false;
    2027             : 
    2028           0 :         case GDT_Unknown:
    2029             :         case GDT_TypeCount:
    2030           0 :             CPLAssert(false);
    2031             :             return false;
    2032             :     }
    2033             : 
    2034      923761 :     return true;
    2035             : }
    2036             : 
    2037             : /************************************************************************/
    2038             : /*                          GWKGetPixelValue()                          */
    2039             : /************************************************************************/
    2040             : 
    2041             : /* It is assumed that panUnifiedSrcValid has been checked before */
    2042             : 
    2043    29336000 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
    2044             :                              GPtrDiff_t iSrcOffset, double *pdfDensity,
    2045             :                              double *pdfReal, double *pdfImag)
    2046             : 
    2047             : {
    2048    29336000 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2049             : 
    2050    58672100 :     if (poWK->papanBandSrcValid != nullptr &&
    2051    29336000 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2052           0 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2053             :     {
    2054           0 :         *pdfDensity = 0.0;
    2055           0 :         return false;
    2056             :     }
    2057             : 
    2058    29336000 :     *pdfReal = 0.0;
    2059    29336000 :     *pdfImag = 0.0;
    2060             : 
    2061             :     // TODO(schwehr): Fix casting.
    2062    29336000 :     switch (poWK->eWorkingDataType)
    2063             :     {
    2064    28245600 :         case GDT_Byte:
    2065    28245600 :             *pdfReal = pabySrc[iSrcOffset];
    2066    28245600 :             *pdfImag = 0.0;
    2067    28245600 :             break;
    2068             : 
    2069           0 :         case GDT_Int8:
    2070           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2071           0 :             *pdfImag = 0.0;
    2072           0 :             break;
    2073             : 
    2074       28181 :         case GDT_Int16:
    2075       28181 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2076       28181 :             *pdfImag = 0.0;
    2077       28181 :             break;
    2078             : 
    2079         163 :         case GDT_UInt16:
    2080         163 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2081         163 :             *pdfImag = 0.0;
    2082         163 :             break;
    2083             : 
    2084       13663 :         case GDT_Int32:
    2085       13663 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2086       13663 :             *pdfImag = 0.0;
    2087       13663 :             break;
    2088             : 
    2089          63 :         case GDT_UInt32:
    2090          63 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2091          63 :             *pdfImag = 0.0;
    2092          63 :             break;
    2093             : 
    2094           0 :         case GDT_Int64:
    2095           0 :             *pdfReal = static_cast<double>(
    2096           0 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2097           0 :             *pdfImag = 0.0;
    2098           0 :             break;
    2099             : 
    2100           0 :         case GDT_UInt64:
    2101           0 :             *pdfReal = static_cast<double>(
    2102           0 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2103           0 :             *pdfImag = 0.0;
    2104           0 :             break;
    2105             : 
    2106     1047220 :         case GDT_Float32:
    2107     1047220 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
    2108     1047220 :             *pdfImag = 0.0;
    2109     1047220 :             break;
    2110             : 
    2111         582 :         case GDT_Float64:
    2112         582 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2113         582 :             *pdfImag = 0.0;
    2114         582 :             break;
    2115             : 
    2116         130 :         case GDT_CInt16:
    2117         130 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
    2118         130 :             *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2119         130 :             break;
    2120             : 
    2121         130 :         case GDT_CInt32:
    2122         130 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
    2123         130 :             *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
    2124         130 :             break;
    2125             : 
    2126         178 :         case GDT_CFloat32:
    2127         178 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2];
    2128         178 :             *pdfImag = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1];
    2129         178 :             break;
    2130             : 
    2131         130 :         case GDT_CFloat64:
    2132         130 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
    2133         130 :             *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
    2134         130 :             break;
    2135             : 
    2136           0 :         case GDT_Unknown:
    2137             :         case GDT_TypeCount:
    2138           0 :             CPLAssert(false);
    2139             :             *pdfDensity = 0.0;
    2140             :             return false;
    2141             :     }
    2142             : 
    2143    29336000 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2144     3015160 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2145             :     else
    2146    26320900 :         *pdfDensity = 1.0;
    2147             : 
    2148    29336000 :     return *pdfDensity != 0.0;
    2149             : }
    2150             : 
    2151             : /************************************************************************/
    2152             : /*                       GWKGetPixelValueReal()                         */
    2153             : /************************************************************************/
    2154             : 
    2155        1012 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2156             :                                  GPtrDiff_t iSrcOffset, double *pdfDensity,
    2157             :                                  double *pdfReal)
    2158             : 
    2159             : {
    2160        1012 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2161             : 
    2162        2026 :     if (poWK->papanBandSrcValid != nullptr &&
    2163        1014 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2164           2 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2165             :     {
    2166           0 :         *pdfDensity = 0.0;
    2167           0 :         return false;
    2168             :     }
    2169             : 
    2170        1012 :     switch (poWK->eWorkingDataType)
    2171             :     {
    2172           1 :         case GDT_Byte:
    2173           1 :             *pdfReal = pabySrc[iSrcOffset];
    2174           1 :             break;
    2175             : 
    2176           0 :         case GDT_Int8:
    2177           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2178           0 :             break;
    2179             : 
    2180           1 :         case GDT_Int16:
    2181           1 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2182           1 :             break;
    2183             : 
    2184           1 :         case GDT_UInt16:
    2185           1 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2186           1 :             break;
    2187             : 
    2188         870 :         case GDT_Int32:
    2189         870 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2190         870 :             break;
    2191             : 
    2192          67 :         case GDT_UInt32:
    2193          67 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2194          67 :             break;
    2195             : 
    2196           0 :         case GDT_Int64:
    2197           0 :             *pdfReal = static_cast<double>(
    2198           0 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2199           0 :             break;
    2200             : 
    2201           0 :         case GDT_UInt64:
    2202           0 :             *pdfReal = static_cast<double>(
    2203           0 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2204           0 :             break;
    2205             : 
    2206           2 :         case GDT_Float32:
    2207           2 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
    2208           2 :             break;
    2209             : 
    2210          70 :         case GDT_Float64:
    2211          70 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2212          70 :             break;
    2213             : 
    2214           0 :         case GDT_CInt16:
    2215             :         case GDT_CInt32:
    2216             :         case GDT_CFloat32:
    2217             :         case GDT_CFloat64:
    2218             :         case GDT_Unknown:
    2219             :         case GDT_TypeCount:
    2220           0 :             CPLAssert(false);
    2221             :             return false;
    2222             :     }
    2223             : 
    2224        1012 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2225           0 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2226             :     else
    2227        1012 :         *pdfDensity = 1.0;
    2228             : 
    2229        1012 :     return *pdfDensity != 0.0;
    2230             : }
    2231             : 
    2232             : /************************************************************************/
    2233             : /*                          GWKGetPixelRow()                            */
    2234             : /************************************************************************/
    2235             : 
    2236             : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
    2237             : /* data-types. */
    2238             : 
    2239     2353850 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
    2240             :                            GPtrDiff_t iSrcOffset, int nHalfSrcLen,
    2241             :                            double *padfDensity, double adfReal[],
    2242             :                            double *padfImag)
    2243             : {
    2244             :     // We know that nSrcLen is even, so we can *always* unroll loops 2x.
    2245     2353850 :     const int nSrcLen = nHalfSrcLen * 2;
    2246     2353850 :     bool bHasValid = false;
    2247             : 
    2248     2353850 :     if (padfDensity != nullptr)
    2249             :     {
    2250             :         // Init the density.
    2251     3345770 :         for (int i = 0; i < nSrcLen; i += 2)
    2252             :         {
    2253     2189510 :             padfDensity[i] = 1.0;
    2254     2189510 :             padfDensity[i + 1] = 1.0;
    2255             :         }
    2256             : 
    2257     1156260 :         if (poWK->panUnifiedSrcValid != nullptr)
    2258             :         {
    2259     3281460 :             for (int i = 0; i < nSrcLen; i += 2)
    2260             :             {
    2261     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
    2262     2067740 :                     bHasValid = true;
    2263             :                 else
    2264       74323 :                     padfDensity[i] = 0.0;
    2265             : 
    2266     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
    2267     2068400 :                     bHasValid = true;
    2268             :                 else
    2269       73668 :                     padfDensity[i + 1] = 0.0;
    2270             :             }
    2271             : 
    2272             :             // Reset or fail as needed.
    2273     1139400 :             if (bHasValid)
    2274     1116590 :                 bHasValid = false;
    2275             :             else
    2276       22806 :                 return false;
    2277             :         }
    2278             : 
    2279     1133450 :         if (poWK->papanBandSrcValid != nullptr &&
    2280           0 :             poWK->papanBandSrcValid[iBand] != nullptr)
    2281             :         {
    2282           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2283             :             {
    2284           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
    2285           0 :                     bHasValid = true;
    2286             :                 else
    2287           0 :                     padfDensity[i] = 0.0;
    2288             : 
    2289           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
    2290           0 :                                iSrcOffset + i + 1))
    2291           0 :                     bHasValid = true;
    2292             :                 else
    2293           0 :                     padfDensity[i + 1] = 0.0;
    2294             :             }
    2295             : 
    2296             :             // Reset or fail as needed.
    2297           0 :             if (bHasValid)
    2298           0 :                 bHasValid = false;
    2299             :             else
    2300           0 :                 return false;
    2301             :         }
    2302             :     }
    2303             : 
    2304             :     // TODO(schwehr): Fix casting.
    2305             :     // Fetch data.
    2306     2331040 :     switch (poWK->eWorkingDataType)
    2307             :     {
    2308     1121060 :         case GDT_Byte:
    2309             :         {
    2310     1121060 :             GByte *pSrc =
    2311     1121060 :                 reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
    2312     1121060 :             pSrc += iSrcOffset;
    2313     3243800 :             for (int i = 0; i < nSrcLen; i += 2)
    2314             :             {
    2315     2122740 :                 adfReal[i] = pSrc[i];
    2316     2122740 :                 adfReal[i + 1] = pSrc[i + 1];
    2317             :             }
    2318     1121060 :             break;
    2319             :         }
    2320             : 
    2321           0 :         case GDT_Int8:
    2322             :         {
    2323           0 :             GInt8 *pSrc =
    2324           0 :                 reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
    2325           0 :             pSrc += iSrcOffset;
    2326           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2327             :             {
    2328           0 :                 adfReal[i] = pSrc[i];
    2329           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2330             :             }
    2331           0 :             break;
    2332             :         }
    2333             : 
    2334        5558 :         case GDT_Int16:
    2335             :         {
    2336        5558 :             GInt16 *pSrc =
    2337        5558 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2338        5558 :             pSrc += iSrcOffset;
    2339       21380 :             for (int i = 0; i < nSrcLen; i += 2)
    2340             :             {
    2341       15822 :                 adfReal[i] = pSrc[i];
    2342       15822 :                 adfReal[i + 1] = pSrc[i + 1];
    2343             :             }
    2344        5558 :             break;
    2345             :         }
    2346             : 
    2347        4114 :         case GDT_UInt16:
    2348             :         {
    2349        4114 :             GUInt16 *pSrc =
    2350        4114 :                 reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
    2351        4114 :             pSrc += iSrcOffset;
    2352       18492 :             for (int i = 0; i < nSrcLen; i += 2)
    2353             :             {
    2354       14378 :                 adfReal[i] = pSrc[i];
    2355       14378 :                 adfReal[i + 1] = pSrc[i + 1];
    2356             :             }
    2357        4114 :             break;
    2358             :         }
    2359             : 
    2360        1130 :         case GDT_Int32:
    2361             :         {
    2362        1130 :             GInt32 *pSrc =
    2363        1130 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2364        1130 :             pSrc += iSrcOffset;
    2365        2992 :             for (int i = 0; i < nSrcLen; i += 2)
    2366             :             {
    2367        1862 :                 adfReal[i] = pSrc[i];
    2368        1862 :                 adfReal[i + 1] = pSrc[i + 1];
    2369             :             }
    2370        1130 :             break;
    2371             :         }
    2372             : 
    2373         750 :         case GDT_UInt32:
    2374             :         {
    2375         750 :             GUInt32 *pSrc =
    2376         750 :                 reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
    2377         750 :             pSrc += iSrcOffset;
    2378        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2379             :             {
    2380        1482 :                 adfReal[i] = pSrc[i];
    2381        1482 :                 adfReal[i + 1] = pSrc[i + 1];
    2382             :             }
    2383         750 :             break;
    2384             :         }
    2385             : 
    2386         190 :         case GDT_Int64:
    2387             :         {
    2388         190 :             auto pSrc =
    2389         190 :                 reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
    2390         190 :             pSrc += iSrcOffset;
    2391         380 :             for (int i = 0; i < nSrcLen; i += 2)
    2392             :             {
    2393         190 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2394         190 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2395             :             }
    2396         190 :             break;
    2397             :         }
    2398             : 
    2399           0 :         case GDT_UInt64:
    2400             :         {
    2401           0 :             auto pSrc =
    2402           0 :                 reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
    2403           0 :             pSrc += iSrcOffset;
    2404           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2405             :             {
    2406           0 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2407           0 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2408             :             }
    2409           0 :             break;
    2410             :         }
    2411             : 
    2412       25074 :         case GDT_Float32:
    2413             :         {
    2414       25074 :             float *pSrc =
    2415       25074 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2416       25074 :             pSrc += iSrcOffset;
    2417      121347 :             for (int i = 0; i < nSrcLen; i += 2)
    2418             :             {
    2419       96273 :                 adfReal[i] = pSrc[i];
    2420       96273 :                 adfReal[i + 1] = pSrc[i + 1];
    2421             :             }
    2422       25074 :             break;
    2423             :         }
    2424             : 
    2425         940 :         case GDT_Float64:
    2426             :         {
    2427         940 :             double *pSrc =
    2428         940 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2429         940 :             pSrc += iSrcOffset;
    2430        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2431             :             {
    2432        1672 :                 adfReal[i] = pSrc[i];
    2433        1672 :                 adfReal[i + 1] = pSrc[i + 1];
    2434             :             }
    2435         940 :             break;
    2436             :         }
    2437             : 
    2438     1169410 :         case GDT_CInt16:
    2439             :         {
    2440     1169410 :             GInt16 *pSrc =
    2441     1169410 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2442     1169410 :             pSrc += 2 * iSrcOffset;
    2443     4676400 :             for (int i = 0; i < nSrcLen; i += 2)
    2444             :             {
    2445     3506990 :                 adfReal[i] = pSrc[2 * i];
    2446     3506990 :                 padfImag[i] = pSrc[2 * i + 1];
    2447             : 
    2448     3506990 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2449     3506990 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2450             :             }
    2451     1169410 :             break;
    2452             :         }
    2453             : 
    2454         940 :         case GDT_CInt32:
    2455             :         {
    2456         940 :             GInt32 *pSrc =
    2457         940 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2458         940 :             pSrc += 2 * iSrcOffset;
    2459        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2460             :             {
    2461        1672 :                 adfReal[i] = pSrc[2 * i];
    2462        1672 :                 padfImag[i] = pSrc[2 * i + 1];
    2463             : 
    2464        1672 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2465        1672 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2466             :             }
    2467         940 :             break;
    2468             :         }
    2469             : 
    2470         940 :         case GDT_CFloat32:
    2471             :         {
    2472         940 :             float *pSrc =
    2473         940 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2474         940 :             pSrc += 2 * iSrcOffset;
    2475        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2476             :             {
    2477        1672 :                 adfReal[i] = pSrc[2 * i];
    2478        1672 :                 padfImag[i] = pSrc[2 * i + 1];
    2479             : 
    2480        1672 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2481        1672 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2482             :             }
    2483         940 :             break;
    2484             :         }
    2485             : 
    2486         940 :         case GDT_CFloat64:
    2487             :         {
    2488         940 :             double *pSrc =
    2489         940 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2490         940 :             pSrc += 2 * iSrcOffset;
    2491        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2492             :             {
    2493        1672 :                 adfReal[i] = pSrc[2 * i];
    2494        1672 :                 padfImag[i] = pSrc[2 * i + 1];
    2495             : 
    2496        1672 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2497        1672 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2498             :             }
    2499         940 :             break;
    2500             :         }
    2501             : 
    2502           0 :         case GDT_Unknown:
    2503             :         case GDT_TypeCount:
    2504           0 :             CPLAssert(false);
    2505             :             if (padfDensity)
    2506             :                 memset(padfDensity, 0, nSrcLen * sizeof(double));
    2507             :             return false;
    2508             :     }
    2509             : 
    2510     2331040 :     if (padfDensity == nullptr)
    2511     1197590 :         return true;
    2512             : 
    2513     1133450 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2514             :     {
    2515     3234200 :         for (int i = 0; i < nSrcLen; i += 2)
    2516             :         {
    2517             :             // Take into account earlier calcs.
    2518     2112850 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2519             :             {
    2520     2072950 :                 padfDensity[i] = 1.0;
    2521     2072950 :                 bHasValid = true;
    2522             :             }
    2523             : 
    2524     2112850 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2525             :             {
    2526     2073600 :                 padfDensity[i + 1] = 1.0;
    2527     2073600 :                 bHasValid = true;
    2528             :             }
    2529             :         }
    2530             :     }
    2531             :     else
    2532             :     {
    2533       54348 :         for (int i = 0; i < nSrcLen; i += 2)
    2534             :         {
    2535       42243 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2536       42243 :                 padfDensity[i] = poWK->pafUnifiedSrcDensity[iSrcOffset + i];
    2537       42243 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2538       41704 :                 bHasValid = true;
    2539             : 
    2540       42243 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2541       42243 :                 padfDensity[i + 1] =
    2542       42243 :                     poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1];
    2543       42243 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2544       41594 :                 bHasValid = true;
    2545             :         }
    2546             :     }
    2547             : 
    2548     1133450 :     return bHasValid;
    2549             : }
    2550             : 
    2551             : /************************************************************************/
    2552             : /*                          GWKGetPixelT()                              */
    2553             : /************************************************************************/
    2554             : 
    2555             : template <class T>
    2556     7332114 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
    2557             :                          GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
    2558             : 
    2559             : {
    2560     7332114 :     T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2561             : 
    2562    16802154 :     if ((poWK->panUnifiedSrcValid != nullptr &&
    2563    14664208 :          !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
    2564     7332114 :         (poWK->papanBandSrcValid != nullptr &&
    2565          21 :          poWK->papanBandSrcValid[iBand] != nullptr &&
    2566          21 :          !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
    2567             :     {
    2568           9 :         *pdfDensity = 0.0;
    2569           9 :         return false;
    2570             :     }
    2571             : 
    2572     7332104 :     *pValue = pSrc[iSrcOffset];
    2573             : 
    2574     7332104 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2575     6997351 :         *pdfDensity = 1.0;
    2576             :     else
    2577      334754 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2578             : 
    2579     7332104 :     return *pdfDensity != 0.0;
    2580             : }
    2581             : 
    2582             : /************************************************************************/
    2583             : /*                        GWKBilinearResample()                         */
    2584             : /*     Set of bilinear interpolators                                    */
    2585             : /************************************************************************/
    2586             : 
    2587       72664 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2588             :                                        double dfSrcX, double dfSrcY,
    2589             :                                        double *pdfDensity, double *pdfReal,
    2590             :                                        double *pdfImag)
    2591             : 
    2592             : {
    2593             :     // Save as local variables to avoid following pointers.
    2594       72664 :     const int nSrcXSize = poWK->nSrcXSize;
    2595       72664 :     const int nSrcYSize = poWK->nSrcYSize;
    2596             : 
    2597       72664 :     int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2598       72664 :     int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2599       72664 :     double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2600       72664 :     double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2601       72664 :     bool bShifted = false;
    2602             : 
    2603       72664 :     if (iSrcX == -1)
    2604             :     {
    2605         292 :         iSrcX = 0;
    2606         292 :         dfRatioX = 1;
    2607             :     }
    2608       72664 :     if (iSrcY == -1)
    2609             :     {
    2610        7686 :         iSrcY = 0;
    2611        7686 :         dfRatioY = 1;
    2612             :     }
    2613       72664 :     GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    2614             : 
    2615             :     // Shift so we don't overrun the array.
    2616       72664 :     if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
    2617       72614 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
    2618       72614 :             iSrcOffset + nSrcXSize + 1)
    2619             :     {
    2620         100 :         bShifted = true;
    2621         100 :         --iSrcOffset;
    2622             :     }
    2623             : 
    2624       72664 :     double adfDensity[2] = {0.0, 0.0};
    2625       72664 :     double adfReal[2] = {0.0, 0.0};
    2626       72664 :     double adfImag[2] = {0.0, 0.0};
    2627       72664 :     double dfAccumulatorReal = 0.0;
    2628       72664 :     double dfAccumulatorImag = 0.0;
    2629       72664 :     double dfAccumulatorDensity = 0.0;
    2630       72664 :     double dfAccumulatorDivisor = 0.0;
    2631             : 
    2632       72664 :     const GPtrDiff_t nSrcPixels =
    2633       72664 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
    2634             :     // Get pixel row.
    2635       72664 :     if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
    2636      145328 :         iSrcOffset < nSrcPixels &&
    2637       72664 :         GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
    2638             :                        adfImag))
    2639             :     {
    2640       67008 :         double dfMult1 = dfRatioX * dfRatioY;
    2641       67008 :         double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
    2642             : 
    2643             :         // Shifting corrected.
    2644       67008 :         if (bShifted)
    2645             :         {
    2646         100 :             adfReal[0] = adfReal[1];
    2647         100 :             adfImag[0] = adfImag[1];
    2648         100 :             adfDensity[0] = adfDensity[1];
    2649             :         }
    2650             : 
    2651             :         // Upper Left Pixel.
    2652       67008 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2653       67008 :             adfDensity[0] > SRC_DENSITY_THRESHOLD)
    2654             :         {
    2655       61578 :             dfAccumulatorDivisor += dfMult1;
    2656             : 
    2657       61578 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    2658       61578 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    2659       61578 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    2660             :         }
    2661             : 
    2662             :         // Upper Right Pixel.
    2663       67008 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    2664       66427 :             adfDensity[1] > SRC_DENSITY_THRESHOLD)
    2665             :         {
    2666       61153 :             dfAccumulatorDivisor += dfMult2;
    2667             : 
    2668       61153 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    2669       61153 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    2670       61153 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    2671             :         }
    2672             :     }
    2673             : 
    2674             :     // Get pixel row.
    2675       72664 :     if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
    2676      213910 :         iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
    2677       68582 :         GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
    2678             :                        adfReal, adfImag))
    2679             :     {
    2680       63023 :         double dfMult1 = dfRatioX * (1.0 - dfRatioY);
    2681       63023 :         double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    2682             : 
    2683             :         // Shifting corrected
    2684       63023 :         if (bShifted)
    2685             :         {
    2686          50 :             adfReal[0] = adfReal[1];
    2687          50 :             adfImag[0] = adfImag[1];
    2688          50 :             adfDensity[0] = adfDensity[1];
    2689             :         }
    2690             : 
    2691             :         // Lower Left Pixel
    2692       63023 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2693       63023 :             adfDensity[0] > SRC_DENSITY_THRESHOLD)
    2694             :         {
    2695       57744 :             dfAccumulatorDivisor += dfMult1;
    2696             : 
    2697       57744 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    2698       57744 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    2699       57744 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    2700             :         }
    2701             : 
    2702             :         // Lower Right Pixel.
    2703       63023 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    2704       62492 :             adfDensity[1] > SRC_DENSITY_THRESHOLD)
    2705             :         {
    2706       57515 :             dfAccumulatorDivisor += dfMult2;
    2707             : 
    2708       57515 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    2709       57515 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    2710       57515 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    2711             :         }
    2712             :     }
    2713             : 
    2714             :     /* -------------------------------------------------------------------- */
    2715             :     /*      Return result.                                                  */
    2716             :     /* -------------------------------------------------------------------- */
    2717       72664 :     if (dfAccumulatorDivisor == 1.0)
    2718             :     {
    2719       41607 :         *pdfReal = dfAccumulatorReal;
    2720       41607 :         *pdfImag = dfAccumulatorImag;
    2721       41607 :         *pdfDensity = dfAccumulatorDensity;
    2722       41607 :         return false;
    2723             :     }
    2724       31057 :     else if (dfAccumulatorDivisor < 0.00001)
    2725             :     {
    2726           0 :         *pdfReal = 0.0;
    2727           0 :         *pdfImag = 0.0;
    2728           0 :         *pdfDensity = 0.0;
    2729           0 :         return false;
    2730             :     }
    2731             :     else
    2732             :     {
    2733       31057 :         *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
    2734       31057 :         *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
    2735       31057 :         *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
    2736       31057 :         return true;
    2737             :     }
    2738             : }
    2739             : 
    2740             : template <class T>
    2741     5115304 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    2742             :                                                int iBand, double dfSrcX,
    2743             :                                                double dfSrcY, T *pValue)
    2744             : 
    2745             : {
    2746             : 
    2747     5115304 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2748     5115304 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2749     5115304 :     GPtrDiff_t iSrcOffset =
    2750     5115304 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    2751     5115304 :     const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2752     5115304 :     const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2753             : 
    2754     5115304 :     const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2755             : 
    2756     5115304 :     if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    2757     5012197 :         iSrcY + 1 < poWK->nSrcYSize)
    2758             :     {
    2759     4988028 :         const double dfAccumulator =
    2760     4988028 :             (pSrc[iSrcOffset] * dfRatioX +
    2761     4988028 :              pSrc[iSrcOffset + 1] * (1.0 - dfRatioX)) *
    2762             :                 dfRatioY +
    2763     4988028 :             (pSrc[iSrcOffset + poWK->nSrcXSize] * dfRatioX +
    2764     4988028 :              pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * (1.0 - dfRatioX)) *
    2765     4988028 :                 (1.0 - dfRatioY);
    2766             : 
    2767     4988028 :         *pValue = GWKRoundValueT<T>(dfAccumulator);
    2768             : 
    2769     4988028 :         return true;
    2770             :     }
    2771             : 
    2772      127289 :     double dfAccumulatorDivisor = 0.0;
    2773      127289 :     double dfAccumulator = 0.0;
    2774             : 
    2775             :     // Upper Left Pixel.
    2776      127289 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
    2777       53406 :         iSrcY < poWK->nSrcYSize)
    2778             :     {
    2779       53406 :         const double dfMult = dfRatioX * dfRatioY;
    2780             : 
    2781       53406 :         dfAccumulatorDivisor += dfMult;
    2782             : 
    2783       53406 :         dfAccumulator += pSrc[iSrcOffset] * dfMult;
    2784             :     }
    2785             : 
    2786             :     // Upper Right Pixel.
    2787      127289 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    2788       61346 :         iSrcY < poWK->nSrcYSize)
    2789             :     {
    2790       61346 :         const double dfMult = (1.0 - dfRatioX) * dfRatioY;
    2791             : 
    2792       61346 :         dfAccumulatorDivisor += dfMult;
    2793             : 
    2794       61346 :         dfAccumulator += pSrc[iSrcOffset + 1] * dfMult;
    2795             :     }
    2796             : 
    2797             :     // Lower Right Pixel.
    2798      127289 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    2799       97438 :         iSrcY + 1 < poWK->nSrcYSize)
    2800             :     {
    2801       72877 :         const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    2802             : 
    2803       72877 :         dfAccumulatorDivisor += dfMult;
    2804             : 
    2805       72877 :         dfAccumulator += pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * dfMult;
    2806             :     }
    2807             : 
    2808             :     // Lower Left Pixel.
    2809      127289 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    2810       89475 :         iSrcY + 1 < poWK->nSrcYSize)
    2811             :     {
    2812       64706 :         const double dfMult = dfRatioX * (1.0 - dfRatioY);
    2813             : 
    2814       64706 :         dfAccumulatorDivisor += dfMult;
    2815             : 
    2816       64706 :         dfAccumulator += pSrc[iSrcOffset + poWK->nSrcXSize] * dfMult;
    2817             :     }
    2818             : 
    2819             :     /* -------------------------------------------------------------------- */
    2820             :     /*      Return result.                                                  */
    2821             :     /* -------------------------------------------------------------------- */
    2822      127289 :     double dfValue = 0.0;
    2823             : 
    2824      127289 :     if (dfAccumulatorDivisor < 0.00001)
    2825             :     {
    2826           0 :         *pValue = 0;
    2827           0 :         return false;
    2828             :     }
    2829      127289 :     else if (dfAccumulatorDivisor == 1.0)
    2830             :     {
    2831        8767 :         dfValue = dfAccumulator;
    2832             :     }
    2833             :     else
    2834             :     {
    2835      118522 :         dfValue = dfAccumulator / dfAccumulatorDivisor;
    2836             :     }
    2837             : 
    2838      127289 :     *pValue = GWKRoundValueT<T>(dfValue);
    2839             : 
    2840      127289 :     return true;
    2841             : }
    2842             : 
    2843             : /************************************************************************/
    2844             : /*                        GWKCubicResample()                            */
    2845             : /*     Set of bicubic interpolators using cubic convolution.            */
    2846             : /************************************************************************/
    2847             : 
    2848             : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
    2849             : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
    2850             : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
    2851             : 
    2852             : template <typename T>
    2853     1602850 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
    2854             :                                  T f1, T f2, T f3)
    2855             : {
    2856     1602850 :     return (f1 + T(0.5) * (distance1 * (f2 - f0) +
    2857     1602850 :                            distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
    2858     1602850 :                            distance3 * (3 * (f1 - f2) + f3 - f0)));
    2859             : }
    2860             : 
    2861             : /************************************************************************/
    2862             : /*                       GWKCubicComputeWeights()                       */
    2863             : /************************************************************************/
    2864             : 
    2865             : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
    2866             : 
    2867             : template <typename T>
    2868     2267674 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
    2869             : {
    2870     2267674 :     const T halfX = T(0.5) * x;
    2871     2267674 :     const T threeX = T(3.0) * x;
    2872     2267674 :     const T halfX2 = halfX * x;
    2873             : 
    2874     2267674 :     coeffs[0] = halfX * (-1 + x * (2 - x));
    2875     2267674 :     coeffs[1] = 1 + halfX2 * (-5 + threeX);
    2876     2267674 :     coeffs[2] = halfX * (1 + x * (4 - threeX));
    2877     2267674 :     coeffs[3] = halfX2 * (-1 + x);
    2878     2267674 : }
    2879             : 
    2880             : // TODO(schwehr): Use an inline function.
    2881             : #define CONVOL4(v1, v2)                                                        \
    2882             :     ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2] +               \
    2883             :      (v1)[3] * (v2)[3])
    2884             : 
    2885             : #if 0
    2886             : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
    2887             : // instead of 17.
    2888             : // TODO(schwehr): Use an inline function.
    2889             : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX)             \
    2890             :     {                                                                          \
    2891             :         const double dfX = dfX_;                                               \
    2892             :         dfHalfX = 0.5 * dfX;                                                   \
    2893             :         const double dfThreeX = 3.0 * dfX;                                     \
    2894             :         const double dfXMinus1 = dfX - 1;                                      \
    2895             :                                                                                \
    2896             :         adfCoeffs[0] = -1 + dfX * (2 - dfX);                                   \
    2897             :         adfCoeffs[1] = dfX * (-5 + dfThreeX);                                  \
    2898             :         /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/                           \
    2899             :         adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1];                              \
    2900             :         /*adfCoeffs[3] = dfX * (-1 + dfX); */                                  \
    2901             :         adfCoeffs[3] = dfXMinus1 - adfCoeffs[0];                               \
    2902             :     }
    2903             : 
    2904             : // TODO(schwehr): Use an inline function.
    2905             : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX)                               \
    2906             :     ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
    2907             :                            (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
    2908             : #endif
    2909             : 
    2910      299879 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2911             :                                     double dfSrcX, double dfSrcY,
    2912             :                                     double *pdfDensity, double *pdfReal,
    2913             :                                     double *pdfImag)
    2914             : 
    2915             : {
    2916      299879 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    2917      299879 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    2918      299879 :     GPtrDiff_t iSrcOffset =
    2919      299879 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    2920      299879 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    2921      299879 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    2922      299879 :     double adfDensity[4] = {};
    2923      299879 :     double adfReal[4] = {};
    2924      299879 :     double adfImag[4] = {};
    2925             : 
    2926             :     // Get the bilinear interpolation at the image borders.
    2927      299879 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    2928      284412 :         iSrcY + 2 >= poWK->nSrcYSize)
    2929       24136 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    2930       24136 :                                           pdfDensity, pdfReal, pdfImag);
    2931             : 
    2932      275743 :     double adfValueDens[4] = {};
    2933      275743 :     double adfValueReal[4] = {};
    2934      275743 :     double adfValueImag[4] = {};
    2935             : 
    2936      275743 :     double adfCoeffsX[4] = {};
    2937      275743 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    2938             : 
    2939     1232410 :     for (GPtrDiff_t i = -1; i < 3; i++)
    2940             :     {
    2941     1003120 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    2942      991507 :                             2, adfDensity, adfReal, adfImag) ||
    2943      991507 :             adfDensity[0] < SRC_DENSITY_THRESHOLD ||
    2944      973867 :             adfDensity[1] < SRC_DENSITY_THRESHOLD ||
    2945     2960190 :             adfDensity[2] < SRC_DENSITY_THRESHOLD ||
    2946      965566 :             adfDensity[3] < SRC_DENSITY_THRESHOLD)
    2947             :         {
    2948       46449 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    2949       46449 :                                               pdfDensity, pdfReal, pdfImag);
    2950             :         }
    2951             : 
    2952      956668 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    2953      956668 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    2954      956668 :         adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
    2955             :     }
    2956             : 
    2957             :     /* -------------------------------------------------------------------- */
    2958             :     /*      For now, if we have any pixels missing in the kernel area,      */
    2959             :     /*      we fallback on using bilinear interpolation.  Ideally we        */
    2960             :     /*      should do "weight adjustment" of our results similarly to       */
    2961             :     /*      what is done for the cubic spline and lanc. interpolators.      */
    2962             :     /* -------------------------------------------------------------------- */
    2963             : 
    2964      229294 :     double adfCoeffsY[4] = {};
    2965      229294 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    2966             : 
    2967      229294 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    2968      229294 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    2969      229294 :     *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
    2970             : 
    2971      229294 :     return true;
    2972             : }
    2973             : 
    2974             : #if defined(__x86_64) || defined(_M_X64)
    2975             : 
    2976             : /************************************************************************/
    2977             : /*                           XMMLoad4Values()                           */
    2978             : /*                                                                      */
    2979             : /*  Load 4 packed byte or uint16, cast them to float and put them in a  */
    2980             : /*  m128 register.                                                      */
    2981             : /************************************************************************/
    2982             : 
    2983      949092 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
    2984             : {
    2985             :     unsigned int i;
    2986      949092 :     memcpy(&i, ptr, 4);
    2987     1898180 :     __m128i xmm_i = _mm_cvtsi32_si128(i);
    2988             :     // Zero extend 4 packed unsigned 8-bit integers in a to packed
    2989             :     // 32-bit integers.
    2990             : #if __SSE4_1__
    2991             :     xmm_i = _mm_cvtepu8_epi32(xmm_i);
    2992             : #else
    2993     1898180 :     xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
    2994     1898180 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    2995             : #endif
    2996     1898180 :     return _mm_cvtepi32_ps(xmm_i);
    2997             : }
    2998             : 
    2999        5292 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
    3000             : {
    3001             :     GUInt64 i;
    3002        5292 :     memcpy(&i, ptr, 8);
    3003       10584 :     __m128i xmm_i = _mm_cvtsi64_si128(i);
    3004             :     // Zero extend 4 packed unsigned 16-bit integers in a to packed
    3005             :     // 32-bit integers.
    3006             : #if __SSE4_1__
    3007             :     xmm_i = _mm_cvtepu16_epi32(xmm_i);
    3008             : #else
    3009       10584 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3010             : #endif
    3011       10584 :     return _mm_cvtepi32_ps(xmm_i);
    3012             : }
    3013             : 
    3014             : /************************************************************************/
    3015             : /*                           XMMHorizontalAdd()                         */
    3016             : /*                                                                      */
    3017             : /*  Return the sum of the 4 floating points of the register.            */
    3018             : /************************************************************************/
    3019             : 
    3020             : #if __SSE3__
    3021             : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3022             : {
    3023             :     __m128 shuf = _mm_movehdup_ps(v);   // (v3   , v3   , v1   , v1)
    3024             :     __m128 sums = _mm_add_ps(v, shuf);  // (v3+v3, v3+v2, v1+v1, v1+v0)
    3025             :     shuf = _mm_movehl_ps(shuf, sums);   // (v3   , v3   , v3+v3, v3+v2)
    3026             :     sums = _mm_add_ss(sums, shuf);      // (v1+v0)+(v3+v2)
    3027             :     return _mm_cvtss_f32(sums);
    3028             : }
    3029             : #else
    3030      238596 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3031             : {
    3032      238596 :     __m128 shuf = _mm_movehl_ps(v, v);     // (v3   , v2   , v3   , v2)
    3033      238596 :     __m128 sums = _mm_add_ps(v, shuf);     // (v3+v3, v2+v2, v3+v1, v2+v0)
    3034      238596 :     shuf = _mm_shuffle_ps(sums, sums, 1);  // (v2+v0, v2+v0, v2+v0, v3+v1)
    3035      238596 :     sums = _mm_add_ss(sums, shuf);         // (v2+v0)+(v3+v1)
    3036      238596 :     return _mm_cvtss_f32(sums);
    3037             : }
    3038             : #endif
    3039             : 
    3040             : #endif  // (defined(__x86_64) || defined(_M_X64))
    3041             : 
    3042             : /************************************************************************/
    3043             : /*            GWKCubicResampleSrcMaskIsDensity4SampleRealT()            */
    3044             : /************************************************************************/
    3045             : 
    3046             : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
    3047             : // because there are a few assumptions above those types.
    3048             : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
    3049             : // perf benefit.
    3050             : 
    3051             : template <class T>
    3052         361 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
    3053             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3054             :     double *pdfDensity, double *pdfReal)
    3055             : {
    3056         361 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3057         361 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3058         361 :     const GPtrDiff_t iSrcOffset =
    3059         361 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3060             : 
    3061             :     // Get the bilinear interpolation at the image borders.
    3062         361 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3063         361 :         iSrcY + 2 >= poWK->nSrcYSize)
    3064             :     {
    3065           0 :         double adfImagIgnored[4] = {};
    3066           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3067           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3068             :     }
    3069             : 
    3070             : #if defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))
    3071             :     const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
    3072             :     const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
    3073             : 
    3074             :     // TODO(schwehr): Explain the magic numbers.
    3075             :     float afTemp[4 + 4 + 4 + 1];
    3076             :     float *pafAligned =
    3077             :         reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
    3078             :     float *pafCoeffs = pafAligned;
    3079             :     float *pafDensity = pafAligned + 4;
    3080             :     float *pafValue = pafAligned + 8;
    3081             : 
    3082             :     const float fHalfDeltaX = 0.5f * fDeltaX;
    3083             :     const float fThreeDeltaX = 3.0f * fDeltaX;
    3084             :     const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
    3085             : 
    3086             :     pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
    3087             :     pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
    3088             :     pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
    3089             :     pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
    3090             :     __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
    3091             :     const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD);
    3092             : 
    3093             :     __m128 xmmMaskLowDensity = _mm_setzero_ps();
    3094             :     for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
    3095             :          i++, iOffset += poWK->nSrcXSize)
    3096             :     {
    3097             :         const __m128 xmmDensity =
    3098             :             _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
    3099             :         xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
    3100             :                                       _mm_cmplt_ps(xmmDensity, xmmThreshold));
    3101             :         pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3102             : 
    3103             :         const __m128 xmmValues =
    3104             :             XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
    3105             :         pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
    3106             :     }
    3107             :     if (_mm_movemask_ps(xmmMaskLowDensity))
    3108             :     {
    3109             :         double adfImagIgnored[4] = {};
    3110             :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3111             :                                           pdfDensity, pdfReal, adfImagIgnored);
    3112             :     }
    3113             : 
    3114             :     const float fHalfDeltaY = 0.5f * fDeltaY;
    3115             :     const float fThreeDeltaY = 3.0f * fDeltaY;
    3116             :     const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
    3117             : 
    3118             :     pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
    3119             :     pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
    3120             :     pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
    3121             :     pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
    3122             : 
    3123             :     xmmCoeffs = _mm_load_ps(pafCoeffs);
    3124             : 
    3125             :     const __m128 xmmDensity = _mm_load_ps(pafDensity);
    3126             :     const __m128 xmmValue = _mm_load_ps(pafValue);
    3127             :     *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3128             :     *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
    3129             : 
    3130             :     // We did all above computations on float32 whereas the general case is
    3131             :     // float64. Not sure if one is fundamentally more correct than the other
    3132             :     // one, but we want our optimization to give the same result as the
    3133             :     // general case as much as possible, so if the resulting value is
    3134             :     // close to some_int_value + 0.5, redo the computation with the general
    3135             :     // case.
    3136             :     // Note: If other types than Byte or UInt16, will need changes.
    3137             :     if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
    3138             :         return true;
    3139             : 
    3140             : #endif  // defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))
    3141             : 
    3142         361 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3143         361 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3144             : 
    3145         361 :     double adfValueDens[4] = {};
    3146         361 :     double adfValueReal[4] = {};
    3147             : 
    3148         361 :     double adfCoeffsX[4] = {};
    3149         361 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3150             : 
    3151         361 :     double adfCoeffsY[4] = {};
    3152         361 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3153             : 
    3154        1433 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3155             :     {
    3156        1177 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3157             : #if !(defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)))
    3158        1177 :         if (poWK->pafUnifiedSrcDensity[iOffset + 0] < SRC_DENSITY_THRESHOLD ||
    3159        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 1] < SRC_DENSITY_THRESHOLD ||
    3160        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 2] < SRC_DENSITY_THRESHOLD ||
    3161        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 3] < SRC_DENSITY_THRESHOLD)
    3162             :         {
    3163         105 :             double adfImagIgnored[4] = {};
    3164         105 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3165             :                                               pdfDensity, pdfReal,
    3166         105 :                                               adfImagIgnored);
    3167             :         }
    3168             : #endif
    3169             : 
    3170        1072 :         adfValueDens[i + 1] =
    3171        1072 :             CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
    3172             : 
    3173        1072 :         adfValueReal[i + 1] = CONVOL4(
    3174             :             adfCoeffsX,
    3175             :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3176             :     }
    3177             : 
    3178         256 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3179         256 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3180             : 
    3181         256 :     return true;
    3182             : }
    3183             : 
    3184             : /************************************************************************/
    3185             : /*              GWKCubicResampleSrcMaskIsDensity4SampleReal()             */
    3186             : /*     Bi-cubic when source has and only has pafUnifiedSrcDensity.      */
    3187             : /************************************************************************/
    3188             : 
    3189           0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
    3190             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3191             :     double *pdfDensity, double *pdfReal)
    3192             : 
    3193             : {
    3194           0 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3195           0 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3196           0 :     const GPtrDiff_t iSrcOffset =
    3197           0 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3198           0 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3199           0 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3200             : 
    3201             :     // Get the bilinear interpolation at the image borders.
    3202           0 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3203           0 :         iSrcY + 2 >= poWK->nSrcYSize)
    3204             :     {
    3205           0 :         double adfImagIgnored[4] = {};
    3206           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3207           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3208             :     }
    3209             : 
    3210           0 :     double adfCoeffsX[4] = {};
    3211           0 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3212             : 
    3213           0 :     double adfCoeffsY[4] = {};
    3214           0 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3215             : 
    3216           0 :     double adfValueDens[4] = {};
    3217           0 :     double adfValueReal[4] = {};
    3218           0 :     double adfDensity[4] = {};
    3219           0 :     double adfReal[4] = {};
    3220           0 :     double adfImagIgnored[4] = {};
    3221             : 
    3222           0 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3223             :     {
    3224           0 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3225           0 :                             2, adfDensity, adfReal, adfImagIgnored) ||
    3226           0 :             adfDensity[0] < SRC_DENSITY_THRESHOLD ||
    3227           0 :             adfDensity[1] < SRC_DENSITY_THRESHOLD ||
    3228           0 :             adfDensity[2] < SRC_DENSITY_THRESHOLD ||
    3229           0 :             adfDensity[3] < SRC_DENSITY_THRESHOLD)
    3230             :         {
    3231           0 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3232             :                                               pdfDensity, pdfReal,
    3233           0 :                                               adfImagIgnored);
    3234             :         }
    3235             : 
    3236           0 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3237           0 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3238             :     }
    3239             : 
    3240           0 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3241           0 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3242             : 
    3243           0 :     return true;
    3244             : }
    3245             : 
    3246             : template <class T>
    3247     1906603 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3248             :                                             int iBand, double dfSrcX,
    3249             :                                             double dfSrcY, T *pValue)
    3250             : 
    3251             : {
    3252     1906603 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3253     1906603 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3254     1906603 :     const GPtrDiff_t iSrcOffset =
    3255     1906603 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3256     1906603 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3257     1906603 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3258     1906603 :     const double dfDeltaY2 = dfDeltaY * dfDeltaY;
    3259     1906603 :     const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
    3260             : 
    3261             :     // Get the bilinear interpolation at the image borders.
    3262     1906603 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3263     1662527 :         iSrcY + 2 >= poWK->nSrcYSize)
    3264      303751 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    3265      303751 :                                                   pValue);
    3266             : 
    3267     1602852 :     double adfCoeffs[4] = {};
    3268     1602852 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
    3269             : 
    3270     1602852 :     double adfValue[4] = {};
    3271             : 
    3272     8014250 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3273             :     {
    3274     6411406 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3275             : 
    3276     6411406 :         adfValue[i + 1] = CONVOL4(
    3277             :             adfCoeffs,
    3278             :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3279             :     }
    3280             : 
    3281             :     const double dfValue =
    3282     1602852 :         CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
    3283             :                          adfValue[1], adfValue[2], adfValue[3]);
    3284             : 
    3285     1602852 :     *pValue = GWKClampValueT<T>(dfValue);
    3286             : 
    3287     1602852 :     return true;
    3288             : }
    3289             : 
    3290             : /************************************************************************/
    3291             : /*                          GWKLanczosSinc()                            */
    3292             : /************************************************************************/
    3293             : 
    3294             : /*
    3295             :  * Lanczos windowed sinc interpolation kernel with radius r.
    3296             :  *        /
    3297             :  *        | sinc(x) * sinc(x/r), if |x| < r
    3298             :  * L(x) = | 1, if x = 0                     ,
    3299             :  *        | 0, otherwise
    3300             :  *        \
    3301             :  *
    3302             :  * where sinc(x) = sin(PI * x) / (PI * x).
    3303             :  */
    3304             : 
    3305        1056 : static double GWKLanczosSinc(double dfX)
    3306             : {
    3307        1056 :     if (dfX == 0.0)
    3308           0 :         return 1.0;
    3309             : 
    3310        1056 :     const double dfPIX = M_PI * dfX;
    3311        1056 :     const double dfPIXoverR = dfPIX / 3;
    3312        1056 :     const double dfPIX2overR = dfPIX * dfPIXoverR;
    3313             :     // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3314             :     // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3315        1056 :     const double dfSinPIXoverR = sin(dfPIXoverR);
    3316        1056 :     const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3317        1056 :     const double dfSinPIXMulSinPIXoverR =
    3318        1056 :         (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3319        1056 :     return dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3320             : }
    3321             : 
    3322      106242 : static double GWKLanczosSinc4Values(double *padfValues)
    3323             : {
    3324      531210 :     for (int i = 0; i < 4; i++)
    3325             :     {
    3326      424968 :         if (padfValues[i] == 0.0)
    3327             :         {
    3328           0 :             padfValues[i] = 1.0;
    3329             :         }
    3330             :         else
    3331             :         {
    3332      424968 :             const double dfPIX = M_PI * padfValues[i];
    3333      424968 :             const double dfPIXoverR = dfPIX / 3;
    3334      424968 :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3335             :             // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3336             :             // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3337      424968 :             const double dfSinPIXoverR = sin(dfPIXoverR);
    3338      424968 :             const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3339      424968 :             const double dfSinPIXMulSinPIXoverR =
    3340      424968 :                 (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3341      424968 :             padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3342             :         }
    3343             :     }
    3344      106242 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3345             : }
    3346             : 
    3347             : /************************************************************************/
    3348             : /*                           GWKBilinear()                              */
    3349             : /************************************************************************/
    3350             : 
    3351     6668810 : static double GWKBilinear(double dfX)
    3352             : {
    3353     6668810 :     double dfAbsX = fabs(dfX);
    3354     6668810 :     if (dfAbsX <= 1.0)
    3355     6197680 :         return 1 - dfAbsX;
    3356             :     else
    3357      471127 :         return 0.0;
    3358             : }
    3359             : 
    3360      396102 : static double GWKBilinear4Values(double *padfValues)
    3361             : {
    3362      396102 :     double dfAbsX0 = fabs(padfValues[0]);
    3363      396102 :     double dfAbsX1 = fabs(padfValues[1]);
    3364      396102 :     double dfAbsX2 = fabs(padfValues[2]);
    3365      396102 :     double dfAbsX3 = fabs(padfValues[3]);
    3366      396102 :     if (dfAbsX0 <= 1.0)
    3367      290173 :         padfValues[0] = 1 - dfAbsX0;
    3368             :     else
    3369      105929 :         padfValues[0] = 0.0;
    3370      396102 :     if (dfAbsX1 <= 1.0)
    3371      396102 :         padfValues[1] = 1 - dfAbsX1;
    3372             :     else
    3373           0 :         padfValues[1] = 0.0;
    3374      396102 :     if (dfAbsX2 <= 1.0)
    3375      396102 :         padfValues[2] = 1 - dfAbsX2;
    3376             :     else
    3377           0 :         padfValues[2] = 0.0;
    3378      396102 :     if (dfAbsX3 <= 1.0)
    3379      290066 :         padfValues[3] = 1 - dfAbsX3;
    3380             :     else
    3381      106036 :         padfValues[3] = 0.0;
    3382      396102 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3383             : }
    3384             : 
    3385             : /************************************************************************/
    3386             : /*                            GWKCubic()                                */
    3387             : /************************************************************************/
    3388             : 
    3389     4339010 : static double GWKCubic(double dfX)
    3390             : {
    3391     4339010 :     return CubicKernel(dfX);
    3392             : }
    3393             : 
    3394     7065530 : static double GWKCubic4Values(double *padfValues)
    3395             : {
    3396     7065530 :     const double dfAbsX_0 = fabs(padfValues[0]);
    3397     7065530 :     const double dfAbsX_1 = fabs(padfValues[1]);
    3398     7065530 :     const double dfAbsX_2 = fabs(padfValues[2]);
    3399     7065530 :     const double dfAbsX_3 = fabs(padfValues[3]);
    3400     7065530 :     const double dfX2_0 = padfValues[0] * padfValues[0];
    3401     7065530 :     const double dfX2_1 = padfValues[1] * padfValues[1];
    3402     7065530 :     const double dfX2_2 = padfValues[2] * padfValues[2];
    3403     7065530 :     const double dfX2_3 = padfValues[3] * padfValues[3];
    3404             : 
    3405     7065530 :     double dfVal0 = 0.0;
    3406     7065530 :     if (dfAbsX_0 <= 1.0)
    3407     1028070 :         dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
    3408     6037460 :     else if (dfAbsX_0 <= 2.0)
    3409     4287900 :         dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
    3410             : 
    3411     7065530 :     double dfVal1 = 0.0;
    3412     7065530 :     if (dfAbsX_1 <= 1.0)
    3413     4100200 :         dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
    3414     2965330 :     else if (dfAbsX_1 <= 2.0)
    3415     2964030 :         dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
    3416             : 
    3417     7065530 :     double dfVal2 = 0.0;
    3418     7065530 :     if (dfAbsX_2 <= 1.0)
    3419     5917860 :         dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
    3420     1147660 :     else if (dfAbsX_2 <= 2.0)
    3421     1149100 :         dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
    3422             : 
    3423     7065530 :     double dfVal3 = 0.0;
    3424     7065530 :     if (dfAbsX_3 <= 1.0)
    3425     3162770 :         dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
    3426     3902760 :     else if (dfAbsX_3 <= 2.0)
    3427     3648640 :         dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
    3428             : 
    3429     7065530 :     padfValues[0] = dfVal0;
    3430     7065530 :     padfValues[1] = dfVal1;
    3431     7065530 :     padfValues[2] = dfVal2;
    3432     7065530 :     padfValues[3] = dfVal3;
    3433     7065530 :     return dfVal0 + dfVal1 + dfVal2 + dfVal3;
    3434             : }
    3435             : 
    3436             : /************************************************************************/
    3437             : /*                           GWKBSpline()                               */
    3438             : /************************************************************************/
    3439             : 
    3440             : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
    3441             : // Equation 8 with (B,C)=(1,0)
    3442             : // 1/6 * ( 3 * |x|^3 -  6 * |x|^2 + 4) |x| < 1
    3443             : // 1/6 * ( -|x|^3 + 6 |x|^2  - 12|x| + 8) |x| >= 1 and |x| < 2
    3444             : 
    3445      138696 : static double GWKBSpline(double x)
    3446             : {
    3447      138696 :     const double xp2 = x + 2.0;
    3448      138696 :     const double xp1 = x + 1.0;
    3449      138696 :     const double xm1 = x - 1.0;
    3450             : 
    3451             :     // This will most likely be used, so we'll compute it ahead of time to
    3452             :     // avoid stalling the processor.
    3453      138696 :     const double xp2c = xp2 * xp2 * xp2;
    3454             : 
    3455             :     // Note that the test is computed only if it is needed.
    3456             :     // TODO(schwehr): Make this easier to follow.
    3457             :     return xp2 > 0.0
    3458      277392 :                ? ((xp1 > 0.0)
    3459      138696 :                       ? ((x > 0.0)
    3460      124338 :                              ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3461       89912 :                                    6.0 * x * x * x
    3462             :                              : 0.0) +
    3463      124338 :                             -4.0 * xp1 * xp1 * xp1
    3464             :                       : 0.0) +
    3465             :                      xp2c
    3466      138696 :                : 0.0;  // * 0.166666666666666666666
    3467             : }
    3468             : 
    3469     2220360 : static double GWKBSpline4Values(double *padfValues)
    3470             : {
    3471    11101800 :     for (int i = 0; i < 4; i++)
    3472             :     {
    3473     8881440 :         const double x = padfValues[i];
    3474     8881440 :         const double xp2 = x + 2.0;
    3475     8881440 :         const double xp1 = x + 1.0;
    3476     8881440 :         const double xm1 = x - 1.0;
    3477             : 
    3478             :         // This will most likely be used, so we'll compute it ahead of time to
    3479             :         // avoid stalling the processor.
    3480     8881440 :         const double xp2c = xp2 * xp2 * xp2;
    3481             : 
    3482             :         // Note that the test is computed only if it is needed.
    3483             :         // TODO(schwehr): Make this easier to follow.
    3484     8881440 :         padfValues[i] =
    3485             :             (xp2 > 0.0)
    3486    17762900 :                 ? ((xp1 > 0.0)
    3487     8881440 :                        ? ((x > 0.0)
    3488     6660880 :                               ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3489     4437750 :                                     6.0 * x * x * x
    3490             :                               : 0.0) +
    3491     6660880 :                              -4.0 * xp1 * xp1 * xp1
    3492             :                        : 0.0) +
    3493             :                       xp2c
    3494             :                 : 0.0;  // * 0.166666666666666666666
    3495             :     }
    3496     2220360 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3497             : }
    3498             : /************************************************************************/
    3499             : /*                       GWKResampleWrkStruct                           */
    3500             : /************************************************************************/
    3501             : 
    3502             : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
    3503             : 
    3504             : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
    3505             :                                    double dfSrcX, double dfSrcY,
    3506             :                                    double *pdfDensity, double *pdfReal,
    3507             :                                    double *pdfImag,
    3508             :                                    GWKResampleWrkStruct *psWrkStruct);
    3509             : 
    3510             : struct _GWKResampleWrkStruct
    3511             : {
    3512             :     pfnGWKResampleType pfnGWKResample;
    3513             : 
    3514             :     // Space for saved X weights.
    3515             :     double *padfWeightsX;
    3516             :     bool *pabCalcX;
    3517             : 
    3518             :     double *padfWeightsY;       // Only used by GWKResampleOptimizedLanczos.
    3519             :     int iLastSrcX;              // Only used by GWKResampleOptimizedLanczos.
    3520             :     int iLastSrcY;              // Only used by GWKResampleOptimizedLanczos.
    3521             :     double dfLastDeltaX;        // Only used by GWKResampleOptimizedLanczos.
    3522             :     double dfLastDeltaY;        // Only used by GWKResampleOptimizedLanczos.
    3523             :     double dfCosPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3524             :     double dfSinPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3525             :     double dfCosPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3526             :     double dfSinPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3527             :     double dfCosPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3528             :     double dfSinPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3529             :     double dfCosPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3530             :     double dfSinPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3531             : 
    3532             :     // Space for saving a row of pixels.
    3533             :     double *padfRowDensity;
    3534             :     double *padfRowReal;
    3535             :     double *padfRowImag;
    3536             : };
    3537             : 
    3538             : /************************************************************************/
    3539             : /*                    GWKResampleCreateWrkStruct()                      */
    3540             : /************************************************************************/
    3541             : 
    3542             : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3543             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3544             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
    3545             : 
    3546             : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3547             :                                         double dfSrcX, double dfSrcY,
    3548             :                                         double *pdfDensity, double *pdfReal,
    3549             :                                         double *pdfImag,
    3550             :                                         GWKResampleWrkStruct *psWrkStruct);
    3551             : 
    3552         341 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
    3553             : {
    3554         341 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3555         341 :     const int nYDist = (poWK->nYRadius + 1) * 2;
    3556             : 
    3557             :     GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
    3558         341 :         CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
    3559             : 
    3560             :     // Alloc space for saved X weights.
    3561         341 :     psWrkStruct->padfWeightsX =
    3562         341 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3563         341 :     psWrkStruct->pabCalcX =
    3564         341 :         static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
    3565             : 
    3566         341 :     psWrkStruct->padfWeightsY =
    3567         341 :         static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
    3568         341 :     psWrkStruct->iLastSrcX = -10;
    3569         341 :     psWrkStruct->iLastSrcY = -10;
    3570         341 :     psWrkStruct->dfLastDeltaX = -10;
    3571         341 :     psWrkStruct->dfLastDeltaY = -10;
    3572             : 
    3573             :     // Alloc space for saving a row of pixels.
    3574         341 :     if (poWK->pafUnifiedSrcDensity == nullptr &&
    3575         314 :         poWK->panUnifiedSrcValid == nullptr &&
    3576         302 :         poWK->papanBandSrcValid == nullptr)
    3577             :     {
    3578         302 :         psWrkStruct->padfRowDensity = nullptr;
    3579             :     }
    3580             :     else
    3581             :     {
    3582          39 :         psWrkStruct->padfRowDensity =
    3583          39 :             static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3584             :     }
    3585         341 :     psWrkStruct->padfRowReal =
    3586         341 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3587         341 :     psWrkStruct->padfRowImag =
    3588         341 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3589             : 
    3590         341 :     if (poWK->eResample == GRA_Lanczos)
    3591             :     {
    3592          63 :         psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
    3593             : 
    3594          63 :         if (poWK->dfXScale < 1)
    3595             :         {
    3596           4 :             psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
    3597           4 :             psWrkStruct->dfSinPiXScaleOver3 =
    3598           4 :                 sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
    3599           4 :                              psWrkStruct->dfCosPiXScaleOver3);
    3600             :             // "Naive":
    3601             :             // const double dfCosPiXScale = cos(  M_PI * dfXScale );
    3602             :             // const double dfSinPiXScale = sin(  M_PI * dfXScale );
    3603             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3604           4 :             psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
    3605           4 :                                               psWrkStruct->dfCosPiXScaleOver3 -
    3606           4 :                                           3) *
    3607           4 :                                          psWrkStruct->dfCosPiXScaleOver3;
    3608           4 :             psWrkStruct->dfSinPiXScale = sqrt(
    3609           4 :                 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
    3610             :         }
    3611             : 
    3612          63 :         if (poWK->dfYScale < 1)
    3613             :         {
    3614          11 :             psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
    3615          11 :             psWrkStruct->dfSinPiYScaleOver3 =
    3616          11 :                 sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
    3617          11 :                              psWrkStruct->dfCosPiYScaleOver3);
    3618             :             // "Naive":
    3619             :             // const double dfCosPiYScale = cos(  M_PI * dfYScale );
    3620             :             // const double dfSinPiYScale = sin(  M_PI * dfYScale );
    3621             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3622          11 :             psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
    3623          11 :                                               psWrkStruct->dfCosPiYScaleOver3 -
    3624          11 :                                           3) *
    3625          11 :                                          psWrkStruct->dfCosPiYScaleOver3;
    3626          11 :             psWrkStruct->dfSinPiYScale = sqrt(
    3627          11 :                 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
    3628             :         }
    3629             :     }
    3630             :     else
    3631         278 :         psWrkStruct->pfnGWKResample = GWKResample;
    3632             : 
    3633         341 :     return psWrkStruct;
    3634             : }
    3635             : 
    3636             : /************************************************************************/
    3637             : /*                    GWKResampleDeleteWrkStruct()                      */
    3638             : /************************************************************************/
    3639             : 
    3640         341 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
    3641             : {
    3642         341 :     CPLFree(psWrkStruct->padfWeightsX);
    3643         341 :     CPLFree(psWrkStruct->padfWeightsY);
    3644         341 :     CPLFree(psWrkStruct->pabCalcX);
    3645         341 :     CPLFree(psWrkStruct->padfRowDensity);
    3646         341 :     CPLFree(psWrkStruct->padfRowReal);
    3647         341 :     CPLFree(psWrkStruct->padfRowImag);
    3648         341 :     CPLFree(psWrkStruct);
    3649         341 : }
    3650             : 
    3651             : /************************************************************************/
    3652             : /*                           GWKResample()                              */
    3653             : /************************************************************************/
    3654             : 
    3655      239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3656             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3657             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
    3658             : 
    3659             : {
    3660             :     // Save as local variables to avoid following pointers in loops.
    3661      239383 :     const int nSrcXSize = poWK->nSrcXSize;
    3662      239383 :     const int nSrcYSize = poWK->nSrcYSize;
    3663             : 
    3664      239383 :     double dfAccumulatorReal = 0.0;
    3665      239383 :     double dfAccumulatorImag = 0.0;
    3666      239383 :     double dfAccumulatorDensity = 0.0;
    3667      239383 :     double dfAccumulatorWeight = 0.0;
    3668      239383 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3669      239383 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3670      239383 :     const GPtrDiff_t iSrcOffset =
    3671      239383 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    3672      239383 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3673      239383 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3674             : 
    3675      239383 :     const double dfXScale = poWK->dfXScale;
    3676      239383 :     const double dfYScale = poWK->dfYScale;
    3677             : 
    3678      239383 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3679             : 
    3680             :     // Space for saved X weights.
    3681      239383 :     double *padfWeightsX = psWrkStruct->padfWeightsX;
    3682      239383 :     bool *pabCalcX = psWrkStruct->pabCalcX;
    3683             : 
    3684             :     // Space for saving a row of pixels.
    3685      239383 :     double *padfRowDensity = psWrkStruct->padfRowDensity;
    3686      239383 :     double *padfRowReal = psWrkStruct->padfRowReal;
    3687      239383 :     double *padfRowImag = psWrkStruct->padfRowImag;
    3688             : 
    3689             :     // Mark as needing calculation (don't calculate the weights yet,
    3690             :     // because a mask may render it unnecessary).
    3691      239383 :     memset(pabCalcX, false, nXDist * sizeof(bool));
    3692             : 
    3693      239383 :     FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
    3694      239383 :     CPLAssert(pfnGetWeight);
    3695             : 
    3696             :     // Skip sampling over edge of image.
    3697      239383 :     int j = poWK->nFiltInitY;
    3698      239383 :     int jMax = poWK->nYRadius;
    3699      239383 :     if (iSrcY + j < 0)
    3700         566 :         j = -iSrcY;
    3701      239383 :     if (iSrcY + jMax >= nSrcYSize)
    3702         662 :         jMax = nSrcYSize - iSrcY - 1;
    3703             : 
    3704      239383 :     int iMin = poWK->nFiltInitX;
    3705      239383 :     int iMax = poWK->nXRadius;
    3706      239383 :     if (iSrcX + iMin < 0)
    3707         566 :         iMin = -iSrcX;
    3708      239383 :     if (iSrcX + iMax >= nSrcXSize)
    3709         659 :         iMax = nSrcXSize - iSrcX - 1;
    3710             : 
    3711      239383 :     const int bXScaleBelow1 = (dfXScale < 1.0);
    3712      239383 :     const int bYScaleBelow1 = (dfYScale < 1.0);
    3713             : 
    3714      239383 :     GPtrDiff_t iRowOffset =
    3715      239383 :         iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
    3716             : 
    3717             :     // Loop over pixel rows in the kernel.
    3718     1445930 :     for (; j <= jMax; ++j)
    3719             :     {
    3720     1206540 :         iRowOffset += nSrcXSize;
    3721             : 
    3722             :         // Get pixel values.
    3723             :         // We can potentially read extra elements after the "normal" end of the
    3724             :         // source arrays, but the contract of papabySrcImage[iBand],
    3725             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    3726             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    3727     1206540 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    3728             :                             padfRowDensity, padfRowReal, padfRowImag))
    3729          72 :             continue;
    3730             : 
    3731             :         // Calculate the Y weight.
    3732             :         double dfWeight1 = (bYScaleBelow1)
    3733     1206470 :                                ? pfnGetWeight((j - dfDeltaY) * dfYScale)
    3734        1600 :                                : pfnGetWeight(j - dfDeltaY);
    3735             : 
    3736             :         // Iterate over pixels in row.
    3737     1206470 :         double dfAccumulatorRealLocal = 0.0;
    3738     1206470 :         double dfAccumulatorImagLocal = 0.0;
    3739     1206470 :         double dfAccumulatorDensityLocal = 0.0;
    3740     1206470 :         double dfAccumulatorWeightLocal = 0.0;
    3741             : 
    3742     7317420 :         for (int i = iMin; i <= iMax; ++i)
    3743             :         {
    3744             :             // Skip sampling if pixel has zero density.
    3745     6110940 :             if (padfRowDensity != nullptr &&
    3746       77277 :                 padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
    3747         546 :                 continue;
    3748             : 
    3749     6110400 :             double dfWeight2 = 0.0;
    3750             : 
    3751             :             // Make or use a cached set of weights for this row.
    3752     6110400 :             if (pabCalcX[i - iMin])
    3753             :             {
    3754             :                 // Use saved weight value instead of recomputing it.
    3755     4903920 :                 dfWeight2 = padfWeightsX[i - iMin];
    3756             :             }
    3757             :             else
    3758             :             {
    3759             :                 // Calculate & save the X weight.
    3760     1206480 :                 padfWeightsX[i - iMin] = dfWeight2 =
    3761     1206480 :                     (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
    3762        1600 :                                     : pfnGetWeight(i - dfDeltaX);
    3763             : 
    3764     1206480 :                 pabCalcX[i - iMin] = true;
    3765             :             }
    3766             : 
    3767             :             // Accumulate!
    3768     6110400 :             dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
    3769     6110400 :             dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
    3770     6110400 :             if (padfRowDensity != nullptr)
    3771       76731 :                 dfAccumulatorDensityLocal +=
    3772       76731 :                     padfRowDensity[i - iMin] * dfWeight2;
    3773     6110400 :             dfAccumulatorWeightLocal += dfWeight2;
    3774             :         }
    3775             : 
    3776     1206470 :         dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
    3777     1206470 :         dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
    3778     1206470 :         dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
    3779     1206470 :         dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
    3780             :     }
    3781             : 
    3782      239383 :     if (dfAccumulatorWeight < 0.000001 ||
    3783        1887 :         (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
    3784             :     {
    3785           0 :         *pdfDensity = 0.0;
    3786           0 :         return false;
    3787             :     }
    3788             : 
    3789             :     // Calculate the output taking into account weighting.
    3790      239383 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    3791             :     {
    3792      239380 :         *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
    3793      239380 :         *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
    3794      239380 :         if (padfRowDensity != nullptr)
    3795        1884 :             *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
    3796             :         else
    3797      237496 :             *pdfDensity = 1.0;
    3798             :     }
    3799             :     else
    3800             :     {
    3801           3 :         *pdfReal = dfAccumulatorReal;
    3802           3 :         *pdfImag = dfAccumulatorImag;
    3803           3 :         if (padfRowDensity != nullptr)
    3804           3 :             *pdfDensity = dfAccumulatorDensity;
    3805             :         else
    3806           0 :             *pdfDensity = 1.0;
    3807             :     }
    3808             : 
    3809      239383 :     return true;
    3810             : }
    3811             : 
    3812             : /************************************************************************/
    3813             : /*                      GWKResampleOptimizedLanczos()                   */
    3814             : /************************************************************************/
    3815             : 
    3816      617144 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3817             :                                         double dfSrcX, double dfSrcY,
    3818             :                                         double *pdfDensity, double *pdfReal,
    3819             :                                         double *pdfImag,
    3820             :                                         GWKResampleWrkStruct *psWrkStruct)
    3821             : 
    3822             : {
    3823             :     // Save as local variables to avoid following pointers in loops.
    3824      617144 :     const int nSrcXSize = poWK->nSrcXSize;
    3825      617144 :     const int nSrcYSize = poWK->nSrcYSize;
    3826             : 
    3827      617144 :     double dfAccumulatorReal = 0.0;
    3828      617144 :     double dfAccumulatorImag = 0.0;
    3829      617144 :     double dfAccumulatorDensity = 0.0;
    3830      617144 :     double dfAccumulatorWeight = 0.0;
    3831      617144 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3832      617144 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3833      617144 :     const GPtrDiff_t iSrcOffset =
    3834      617144 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    3835      617144 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3836      617144 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3837             : 
    3838      617144 :     const double dfXScale = poWK->dfXScale;
    3839      617144 :     const double dfYScale = poWK->dfYScale;
    3840             : 
    3841             :     // Space for saved X weights.
    3842      617144 :     double *const padfWeightsXShifted =
    3843      617144 :         psWrkStruct->padfWeightsX - poWK->nFiltInitX;
    3844      617144 :     double *const padfWeightsYShifted =
    3845      617144 :         psWrkStruct->padfWeightsY - poWK->nFiltInitY;
    3846             : 
    3847             :     // Space for saving a row of pixels.
    3848      617144 :     double *const padfRowDensity = psWrkStruct->padfRowDensity;
    3849      617144 :     double *const padfRowReal = psWrkStruct->padfRowReal;
    3850      617144 :     double *const padfRowImag = psWrkStruct->padfRowImag;
    3851             : 
    3852             :     // Skip sampling over edge of image.
    3853      617144 :     int jMin = poWK->nFiltInitY;
    3854      617144 :     int jMax = poWK->nYRadius;
    3855      617144 :     if (iSrcY + jMin < 0)
    3856       16522 :         jMin = -iSrcY;
    3857      617144 :     if (iSrcY + jMax >= nSrcYSize)
    3858        5782 :         jMax = nSrcYSize - iSrcY - 1;
    3859             : 
    3860      617144 :     int iMin = poWK->nFiltInitX;
    3861      617144 :     int iMax = poWK->nXRadius;
    3862      617144 :     if (iSrcX + iMin < 0)
    3863       15797 :         iMin = -iSrcX;
    3864      617144 :     if (iSrcX + iMax >= nSrcXSize)
    3865        4657 :         iMax = nSrcXSize - iSrcX - 1;
    3866             : 
    3867      617144 :     if (dfXScale < 1.0)
    3868             :     {
    3869      403041 :         while ((iMin - dfDeltaX) * dfXScale < -3.0)
    3870      200179 :             iMin++;
    3871      202862 :         while ((iMax - dfDeltaX) * dfXScale > 3.0)
    3872           0 :             iMax--;
    3873             : 
    3874             :         // clang-format off
    3875             :         /*
    3876             :         Naive version:
    3877             :         for (int i = iMin; i <= iMax; ++i)
    3878             :         {
    3879             :             psWrkStruct->padfWeightsXShifted[i] =
    3880             :                 GWKLanczosSinc((i - dfDeltaX) * dfXScale);
    3881             :         }
    3882             : 
    3883             :         but given that:
    3884             : 
    3885             :         GWKLanczosSinc(x):
    3886             :             if (dfX == 0.0)
    3887             :                 return 1.0;
    3888             : 
    3889             :             const double dfPIX = M_PI * dfX;
    3890             :             const double dfPIXoverR = dfPIX / 3;
    3891             :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3892             :             return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
    3893             : 
    3894             :         and
    3895             :             sin (a + b) = sin a cos b + cos a sin b.
    3896             :             cos (a + b) = cos a cos b - sin a sin b.
    3897             : 
    3898             :         we can skip any sin() computation within the loop
    3899             :         */
    3900             :         // clang-format on
    3901             : 
    3902      202862 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    3903      131072 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    3904             :         {
    3905       71790 :             double dfX = (iMin - dfDeltaX) * dfXScale;
    3906             : 
    3907       71790 :             double dfPIXover3 = M_PI / 3 * dfX;
    3908       71790 :             double dfCosOver3 = cos(dfPIXover3);
    3909       71790 :             double dfSinOver3 = sin(dfPIXover3);
    3910             : 
    3911             :             // "Naive":
    3912             :             // double dfSin = sin( M_PI * dfX );
    3913             :             // double dfCos = cos( M_PI * dfX );
    3914             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    3915       71790 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    3916       71790 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    3917             : 
    3918       71790 :             const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
    3919       71790 :             const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
    3920       71790 :             const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
    3921       71790 :             const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
    3922       71790 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    3923       71790 :             padfWeightsXShifted[iMin] =
    3924       71790 :                 dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
    3925     1636480 :             for (int i = iMin + 1; i <= iMax; ++i)
    3926             :             {
    3927     1564690 :                 dfX += dfXScale;
    3928     1564690 :                 const double dfNewSin =
    3929     1564690 :                     dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
    3930     1564690 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
    3931     1564690 :                                              dfCosOver3 * dfSinPiXScaleOver3;
    3932     1564690 :                 padfWeightsXShifted[i] =
    3933             :                     dfX == 0
    3934     1564690 :                         ? 1.0
    3935     1564690 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
    3936     1564690 :                 const double dfNewCos =
    3937     1564690 :                     dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
    3938     1564690 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
    3939     1564690 :                                              dfSinOver3 * dfSinPiXScaleOver3;
    3940     1564690 :                 dfSin = dfNewSin;
    3941     1564690 :                 dfCos = dfNewCos;
    3942     1564690 :                 dfSinOver3 = dfNewSinOver3;
    3943     1564690 :                 dfCosOver3 = dfNewCosOver3;
    3944             :             }
    3945             : 
    3946       71790 :             psWrkStruct->iLastSrcX = iSrcX;
    3947       71790 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    3948             :         }
    3949             :     }
    3950             :     else
    3951             :     {
    3952      757542 :         while (iMin - dfDeltaX < -3.0)
    3953      343260 :             iMin++;
    3954      414282 :         while (iMax - dfDeltaX > 3.0)
    3955           0 :             iMax--;
    3956             : 
    3957      414282 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    3958      209580 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    3959             :         {
    3960             :             // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
    3961             :             // following trigonometric formulas.
    3962             : 
    3963             :             // TODO(schwehr): Move this somewhere where it can be rendered at
    3964             :             // LaTeX.
    3965             :             // clang-format off
    3966             :             // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
    3967             :             //                            cos(M_PI * dfBase) * sin(M_PI * k)
    3968             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
    3969             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
    3970             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
    3971             : 
    3972             :             // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
    3973             :             //                                  cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
    3974             :             // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
    3975             :             // clang-format on
    3976             : 
    3977      414282 :             const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
    3978      414282 :             const double dfSin2PIDeltaXOver3 =
    3979             :                 dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
    3980             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
    3981      414282 :             const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
    3982      414282 :             const double dfSinPIDeltaX =
    3983      414282 :                 (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
    3984      414282 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    3985      414282 :             const double dfInvPI2Over3xSinPIDeltaX =
    3986             :                 dfInvPI2Over3 * dfSinPIDeltaX;
    3987      414282 :             const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
    3988      414282 :                 -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
    3989      414282 :             const double dfSinPIOver3 = 0.8660254037844386;
    3990      414282 :             const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
    3991      414282 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
    3992             :             const double padfCst[] = {
    3993      414282 :                 dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
    3994      414282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
    3995             :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
    3996      414282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
    3997      414282 :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
    3998             : 
    3999     2936860 :             for (int i = iMin; i <= iMax; ++i)
    4000             :             {
    4001     2522570 :                 const double dfX = i - dfDeltaX;
    4002     2522570 :                 if (dfX == 0.0)
    4003       58282 :                     padfWeightsXShifted[i] = 1.0;
    4004             :                 else
    4005     2464290 :                     padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
    4006             : #if DEBUG_VERBOSE
    4007             :                     // TODO(schwehr): AlmostEqual.
    4008             :                     // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
    4009             :                     //               GWKLanczosSinc(dfX, 3.0)) < 1e-10);
    4010             : #endif
    4011             :             }
    4012             : 
    4013      414282 :             psWrkStruct->iLastSrcX = iSrcX;
    4014      414282 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4015             :         }
    4016             :     }
    4017             : 
    4018      617144 :     if (dfYScale < 1.0)
    4019             :     {
    4020      403116 :         while ((jMin - dfDeltaY) * dfYScale < -3.0)
    4021      200254 :             jMin++;
    4022      202862 :         while ((jMax - dfDeltaY) * dfYScale > 3.0)
    4023           0 :             jMax--;
    4024             : 
    4025             :         // clang-format off
    4026             :         /*
    4027             :         Naive version:
    4028             :         for (int j = jMin; j <= jMax; ++j)
    4029             :         {
    4030             :             padfWeightsYShifted[j] =
    4031             :                 GWKLanczosSinc((j - dfDeltaY) * dfYScale);
    4032             :         }
    4033             :         */
    4034             :         // clang-format on
    4035             : 
    4036      202862 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4037      202479 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4038             :         {
    4039         383 :             double dfY = (jMin - dfDeltaY) * dfYScale;
    4040             : 
    4041         383 :             double dfPIYover3 = M_PI / 3 * dfY;
    4042         383 :             double dfCosOver3 = cos(dfPIYover3);
    4043         383 :             double dfSinOver3 = sin(dfPIYover3);
    4044             : 
    4045             :             // "Naive":
    4046             :             // double dfSin = sin( M_PI * dfY );
    4047             :             // double dfCos = cos( M_PI * dfY );
    4048             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4049         383 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4050         383 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4051             : 
    4052         383 :             const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
    4053         383 :             const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
    4054         383 :             const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
    4055         383 :             const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
    4056         383 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4057         383 :             padfWeightsYShifted[jMin] =
    4058         383 :                 dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
    4059        7318 :             for (int j = jMin + 1; j <= jMax; ++j)
    4060             :             {
    4061        6935 :                 dfY += dfYScale;
    4062        6935 :                 const double dfNewSin =
    4063        6935 :                     dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
    4064        6935 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
    4065        6935 :                                              dfCosOver3 * dfSinPiYScaleOver3;
    4066        6935 :                 padfWeightsYShifted[j] =
    4067             :                     dfY == 0
    4068        6935 :                         ? 1.0
    4069        6935 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
    4070        6935 :                 const double dfNewCos =
    4071        6935 :                     dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
    4072        6935 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
    4073        6935 :                                              dfSinOver3 * dfSinPiYScaleOver3;
    4074        6935 :                 dfSin = dfNewSin;
    4075        6935 :                 dfCos = dfNewCos;
    4076        6935 :                 dfSinOver3 = dfNewSinOver3;
    4077        6935 :                 dfCosOver3 = dfNewCosOver3;
    4078             :             }
    4079             : 
    4080         383 :             psWrkStruct->iLastSrcY = iSrcY;
    4081         383 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4082             :         }
    4083             :     }
    4084             :     else
    4085             :     {
    4086      684742 :         while (jMin - dfDeltaY < -3.0)
    4087      270460 :             jMin++;
    4088      414282 :         while (jMax - dfDeltaY > 3.0)
    4089           0 :             jMax--;
    4090             : 
    4091      414282 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4092      413663 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4093             :         {
    4094        1132 :             const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
    4095        1132 :             const double dfSin2PIDeltaYOver3 =
    4096             :                 dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
    4097             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
    4098        1132 :             const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
    4099        1132 :             const double dfSinPIDeltaY =
    4100        1132 :                 (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
    4101        1132 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4102        1132 :             const double dfInvPI2Over3xSinPIDeltaY =
    4103             :                 dfInvPI2Over3 * dfSinPIDeltaY;
    4104        1132 :             const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
    4105        1132 :                 -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
    4106        1132 :             const double dfSinPIOver3 = 0.8660254037844386;
    4107        1132 :             const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
    4108        1132 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
    4109             :             const double padfCst[] = {
    4110        1132 :                 dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
    4111        1132 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
    4112             :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
    4113        1132 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
    4114        1132 :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
    4115             : 
    4116        7917 :             for (int j = jMin; j <= jMax; ++j)
    4117             :             {
    4118        6785 :                 const double dfY = j - dfDeltaY;
    4119        6785 :                 if (dfY == 0.0)
    4120         460 :                     padfWeightsYShifted[j] = 1.0;
    4121             :                 else
    4122        6325 :                     padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
    4123             : #if DEBUG_VERBOSE
    4124             :                     // TODO(schwehr): AlmostEqual.
    4125             :                     // CPLAssert(fabs(padfWeightsYShifted[j] -
    4126             :                     //               GWKLanczosSinc(dfY, 3.0)) < 1e-10);
    4127             : #endif
    4128             :             }
    4129             : 
    4130        1132 :             psWrkStruct->iLastSrcY = iSrcY;
    4131        1132 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4132             :         }
    4133             :     }
    4134             : 
    4135             :     // If we have no density information, we can simply compute the
    4136             :     // accumulated weight.
    4137      617144 :     if (padfRowDensity == nullptr)
    4138             :     {
    4139      617144 :         double dfRowAccWeight = 0.0;
    4140     7903490 :         for (int i = iMin; i <= iMax; ++i)
    4141             :         {
    4142     7286350 :             dfRowAccWeight += padfWeightsXShifted[i];
    4143             :         }
    4144      617144 :         double dfColAccWeight = 0.0;
    4145     7958040 :         for (int j = jMin; j <= jMax; ++j)
    4146             :         {
    4147     7340900 :             dfColAccWeight += padfWeightsYShifted[j];
    4148             :         }
    4149      617144 :         dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
    4150             :     }
    4151             : 
    4152             :     // Loop over pixel rows in the kernel.
    4153             : 
    4154      617144 :     if (poWK->eWorkingDataType == GDT_Byte && !poWK->panUnifiedSrcValid &&
    4155      616524 :         !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
    4156             :         !padfRowDensity)
    4157             :     {
    4158             :         // Optimization for Byte case without any masking/alpha
    4159             : 
    4160      616524 :         if (dfAccumulatorWeight < 0.000001)
    4161             :         {
    4162           0 :             *pdfDensity = 0.0;
    4163           0 :             return false;
    4164             :         }
    4165             : 
    4166      616524 :         const GByte *pSrc =
    4167      616524 :             reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
    4168      616524 :         pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4169             : 
    4170             : #if defined(__x86_64) || defined(_M_X64)
    4171      616524 :         if (iMax - iMin + 1 == 6)
    4172             :         {
    4173             :             // This is just an optimized version of the general case in
    4174             :             // the else clause.
    4175             : 
    4176      346854 :             pSrc += iMin;
    4177      346854 :             int j = jMin;
    4178             :             const auto fourXWeights =
    4179      346854 :                 XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
    4180             : 
    4181             :             // Process 2 lines at the same time.
    4182     1375860 :             for (; j < jMax; j += 2)
    4183             :             {
    4184             :                 const XMMReg4Double v_acc =
    4185     1029000 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4186             :                 const XMMReg4Double v_acc2 =
    4187     1029000 :                     XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
    4188     1029000 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4189     1029000 :                 const double dfRowAccEnd =
    4190     1029000 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4191     1029000 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4192     1029000 :                 dfAccumulatorReal +=
    4193     1029000 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4194     1029000 :                 const double dfRowAcc2 = v_acc2.GetHorizSum();
    4195     1029000 :                 const double dfRowAcc2End =
    4196     1029000 :                     pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
    4197     1029000 :                     pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
    4198     1029000 :                 dfAccumulatorReal +=
    4199     1029000 :                     (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
    4200     1029000 :                 pSrc += 2 * nSrcXSize;
    4201             :             }
    4202      346854 :             if (j == jMax)
    4203             :             {
    4204             :                 // Process last line if there's an odd number of them.
    4205             : 
    4206             :                 const XMMReg4Double v_acc =
    4207       86045 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4208       86045 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4209       86045 :                 const double dfRowAccEnd =
    4210       86045 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4211       86045 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4212       86045 :                 dfAccumulatorReal +=
    4213       86045 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4214             :             }
    4215             :         }
    4216             :         else
    4217             : #endif
    4218             :         {
    4219     5463580 :             for (int j = jMin; j <= jMax; ++j)
    4220             :             {
    4221     5193900 :                 int i = iMin;
    4222     5193900 :                 double dfRowAcc1 = 0.0;
    4223     5193900 :                 double dfRowAcc2 = 0.0;
    4224             :                 // A bit of loop unrolling
    4225    62750600 :                 for (; i < iMax; i += 2)
    4226             :                 {
    4227    57556700 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4228    57556700 :                     dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
    4229             :                 }
    4230     5193900 :                 if (i == iMax)
    4231             :                 {
    4232             :                     // Process last column if there's an odd number of them.
    4233      426183 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4234             :                 }
    4235             : 
    4236     5193900 :                 dfAccumulatorReal +=
    4237     5193900 :                     (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
    4238     5193900 :                 pSrc += nSrcXSize;
    4239             :             }
    4240             :         }
    4241             : 
    4242             :         // Calculate the output taking into account weighting.
    4243      616524 :         if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4244             :         {
    4245      569230 :             const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4246      569230 :             *pdfReal = dfAccumulatorReal * dfInvAcc;
    4247      569230 :             *pdfDensity = 1.0;
    4248             :         }
    4249             :         else
    4250             :         {
    4251       47294 :             *pdfReal = dfAccumulatorReal;
    4252       47294 :             *pdfDensity = 1.0;
    4253             :         }
    4254             : 
    4255      616524 :         return true;
    4256             :     }
    4257             : 
    4258         620 :     GPtrDiff_t iRowOffset =
    4259         620 :         iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
    4260             : 
    4261         620 :     int nCountValid = 0;
    4262         620 :     const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
    4263             : 
    4264        3560 :     for (int j = jMin; j <= jMax; ++j)
    4265             :     {
    4266        2940 :         iRowOffset += nSrcXSize;
    4267             : 
    4268             :         // Get pixel values.
    4269             :         // We can potentially read extra elements after the "normal" end of the
    4270             :         // source arrays, but the contract of papabySrcImage[iBand],
    4271             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4272             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4273        2940 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4274             :                             padfRowDensity, padfRowReal, padfRowImag))
    4275           0 :             continue;
    4276             : 
    4277        2940 :         const double dfWeight1 = padfWeightsYShifted[j];
    4278             : 
    4279             :         // Iterate over pixels in row.
    4280        2940 :         if (padfRowDensity != nullptr)
    4281             :         {
    4282           0 :             for (int i = iMin; i <= iMax; ++i)
    4283             :             {
    4284             :                 // Skip sampling if pixel has zero density.
    4285           0 :                 if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
    4286           0 :                     continue;
    4287             : 
    4288           0 :                 nCountValid++;
    4289             : 
    4290             :                 //  Use a cached set of weights for this row.
    4291           0 :                 const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
    4292             : 
    4293             :                 // Accumulate!
    4294           0 :                 dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
    4295           0 :                 dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
    4296           0 :                 dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
    4297           0 :                 dfAccumulatorWeight += dfWeight2;
    4298             :             }
    4299             :         }
    4300        2940 :         else if (bIsNonComplex)
    4301             :         {
    4302        1764 :             double dfRowAccReal = 0.0;
    4303       10560 :             for (int i = iMin; i <= iMax; ++i)
    4304             :             {
    4305        8796 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4306             : 
    4307             :                 // Accumulate!
    4308        8796 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4309             :             }
    4310             : 
    4311        1764 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4312             :         }
    4313             :         else
    4314             :         {
    4315        1176 :             double dfRowAccReal = 0.0;
    4316        1176 :             double dfRowAccImag = 0.0;
    4317        7040 :             for (int i = iMin; i <= iMax; ++i)
    4318             :             {
    4319        5864 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4320             : 
    4321             :                 // Accumulate!
    4322        5864 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4323        5864 :                 dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
    4324             :             }
    4325             : 
    4326        1176 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4327        1176 :             dfAccumulatorImag += dfRowAccImag * dfWeight1;
    4328             :         }
    4329             :     }
    4330             : 
    4331         620 :     if (dfAccumulatorWeight < 0.000001 ||
    4332           0 :         (padfRowDensity != nullptr &&
    4333           0 :          (dfAccumulatorDensity < 0.000001 ||
    4334           0 :           nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
    4335             :     {
    4336           0 :         *pdfDensity = 0.0;
    4337           0 :         return false;
    4338             :     }
    4339             : 
    4340             :     // Calculate the output taking into account weighting.
    4341         620 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4342             :     {
    4343           0 :         const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4344           0 :         *pdfReal = dfAccumulatorReal * dfInvAcc;
    4345           0 :         *pdfImag = dfAccumulatorImag * dfInvAcc;
    4346           0 :         if (padfRowDensity != nullptr)
    4347           0 :             *pdfDensity = dfAccumulatorDensity * dfInvAcc;
    4348             :         else
    4349           0 :             *pdfDensity = 1.0;
    4350             :     }
    4351             :     else
    4352             :     {
    4353         620 :         *pdfReal = dfAccumulatorReal;
    4354         620 :         *pdfImag = dfAccumulatorImag;
    4355         620 :         if (padfRowDensity != nullptr)
    4356           0 :             *pdfDensity = dfAccumulatorDensity;
    4357             :         else
    4358         620 :             *pdfDensity = 1.0;
    4359             :     }
    4360             : 
    4361         620 :     return true;
    4362             : }
    4363             : 
    4364             : /************************************************************************/
    4365             : /*                        GWKComputeWeights()                           */
    4366             : /************************************************************************/
    4367             : 
    4368     3747920 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
    4369             :                               double dfDeltaX, double dfXScale, int jMin,
    4370             :                               int jMax, double dfDeltaY, double dfYScale,
    4371             :                               double *padfWeightsHorizontal,
    4372             :                               double *padfWeightsVertical, double &dfInvWeights)
    4373             : {
    4374             : 
    4375     3747920 :     const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
    4376     3747920 :     CPLAssert(pfnGetWeight);
    4377     3747920 :     const FilterFunc4ValuesType pfnGetWeight4Values =
    4378     3747920 :         apfGWKFilter4Values[eResample];
    4379     3747920 :     CPLAssert(pfnGetWeight4Values);
    4380             : 
    4381     3747920 :     int i = iMin;  // Used after for.
    4382     3747920 :     int iC = 0;    // Used after for.
    4383     3747920 :     double dfAccumulatorWeightHorizontal = 0.0;
    4384     8316580 :     for (; i + 2 < iMax; i += 4, iC += 4)
    4385             :     {
    4386     4568320 :         padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
    4387     4568320 :         padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
    4388     4568320 :         padfWeightsHorizontal[iC + 2] =
    4389     4568320 :             padfWeightsHorizontal[iC + 1] + dfXScale;
    4390     4568320 :         padfWeightsHorizontal[iC + 3] =
    4391     4568320 :             padfWeightsHorizontal[iC + 2] + dfXScale;
    4392     4568660 :         dfAccumulatorWeightHorizontal +=
    4393     4568320 :             pfnGetWeight4Values(padfWeightsHorizontal + iC);
    4394             :     }
    4395     3962520 :     for (; i <= iMax; ++i, ++iC)
    4396             :     {
    4397      220112 :         const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
    4398      214256 :         padfWeightsHorizontal[iC] = dfWeight;
    4399      214256 :         dfAccumulatorWeightHorizontal += dfWeight;
    4400             :     }
    4401             : 
    4402     3742410 :     int j = jMin;  // Used after for.
    4403     3742410 :     int jC = 0;    // Used after for.
    4404     3742410 :     double dfAccumulatorWeightVertical = 0.0;
    4405     7890240 :     for (; j + 2 < jMax; j += 4, jC += 4)
    4406             :     {
    4407     4146000 :         padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
    4408     4146000 :         padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
    4409     4146000 :         padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
    4410     4146000 :         padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
    4411     4147840 :         dfAccumulatorWeightVertical +=
    4412     4146000 :             pfnGetWeight4Values(padfWeightsVertical + jC);
    4413             :     }
    4414     8253170 :     for (; j <= jMax; ++j, ++jC)
    4415             :     {
    4416     4510710 :         const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
    4417     4508930 :         padfWeightsVertical[jC] = dfWeight;
    4418     4508930 :         dfAccumulatorWeightVertical += dfWeight;
    4419             :     }
    4420             : 
    4421     3742460 :     dfInvWeights =
    4422     3742460 :         1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
    4423     3742460 : }
    4424             : 
    4425             : /************************************************************************/
    4426             : /*                        GWKResampleNoMasksT()                         */
    4427             : /************************************************************************/
    4428             : 
    4429             : template <class T>
    4430             : static bool
    4431             : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    4432             :                     double dfSrcY, T *pValue, double *padfWeightsHorizontal,
    4433             :                     double *padfWeightsVertical, double &dfInvWeights)
    4434             : 
    4435             : {
    4436             :     // Commonly used; save locally.
    4437             :     const int nSrcXSize = poWK->nSrcXSize;
    4438             :     const int nSrcYSize = poWK->nSrcYSize;
    4439             : 
    4440             :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4441             :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4442             :     const GPtrDiff_t iSrcOffset =
    4443             :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4444             : 
    4445             :     const int nXRadius = poWK->nXRadius;
    4446             :     const int nYRadius = poWK->nYRadius;
    4447             : 
    4448             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4449             :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4450             :         nYRadius > nSrcYSize)
    4451             :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4452             :                                                   pValue);
    4453             : 
    4454             :     T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    4455             :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4456             :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4457             : 
    4458             :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4459             :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4460             : 
    4461             :     int iMin = 1 - nXRadius;
    4462             :     if (iSrcX + iMin < 0)
    4463             :         iMin = -iSrcX;
    4464             :     int iMax = nXRadius;
    4465             :     if (iSrcX + iMax >= nSrcXSize - 1)
    4466             :         iMax = nSrcXSize - 1 - iSrcX;
    4467             : 
    4468             :     int jMin = 1 - nYRadius;
    4469             :     if (iSrcY + jMin < 0)
    4470             :         jMin = -iSrcY;
    4471             :     int jMax = nYRadius;
    4472             :     if (iSrcY + jMax >= nSrcYSize - 1)
    4473             :         jMax = nSrcYSize - 1 - iSrcY;
    4474             : 
    4475             :     if (iBand == 0)
    4476             :     {
    4477             :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4478             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4479             :                           padfWeightsVertical, dfInvWeights);
    4480             :     }
    4481             : 
    4482             :     // Loop over all rows in the kernel.
    4483             :     double dfAccumulator = 0.0;
    4484             :     for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
    4485             :     {
    4486             :         const GPtrDiff_t iSampJ =
    4487             :             iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
    4488             : 
    4489             :         // Loop over all pixels in the row.
    4490             :         double dfAccumulatorLocal = 0.0;
    4491             :         double dfAccumulatorLocal2 = 0.0;
    4492             :         int iC = 0;
    4493             :         int i = iMin;
    4494             :         // Process by chunk of 4 cols.
    4495             :         for (; i + 2 < iMax; i += 4, iC += 4)
    4496             :         {
    4497             :             // Retrieve the pixel & accumulate.
    4498             :             dfAccumulatorLocal +=
    4499             :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4500             :             dfAccumulatorLocal +=
    4501             :                 pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
    4502             :             dfAccumulatorLocal2 +=
    4503             :                 pSrcBand[i + 2 + iSampJ] * padfWeightsHorizontal[iC + 2];
    4504             :             dfAccumulatorLocal2 +=
    4505             :                 pSrcBand[i + 3 + iSampJ] * padfWeightsHorizontal[iC + 3];
    4506             :         }
    4507             :         dfAccumulatorLocal += dfAccumulatorLocal2;
    4508             :         if (i < iMax)
    4509             :         {
    4510             :             dfAccumulatorLocal +=
    4511             :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4512             :             dfAccumulatorLocal +=
    4513             :                 pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
    4514             :             i += 2;
    4515             :             iC += 2;
    4516             :         }
    4517             :         if (i == iMax)
    4518             :         {
    4519             :             dfAccumulatorLocal +=
    4520             :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4521             :         }
    4522             : 
    4523             :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4524             :     }
    4525             : 
    4526             :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4527             : 
    4528             :     return true;
    4529             : }
    4530             : 
    4531             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    4532             : /* Could possibly be used too on 32bit, but we would need to check at runtime */
    4533             : #if defined(__x86_64) || defined(_M_X64)
    4534             : 
    4535             : /************************************************************************/
    4536             : /*                    GWKResampleNoMasks_SSE2_T()                       */
    4537             : /************************************************************************/
    4538             : 
    4539             : template <class T>
    4540     9164113 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
    4541             :                                       double dfSrcX, double dfSrcY, T *pValue,
    4542             :                                       double *padfWeightsHorizontal,
    4543             :                                       double *padfWeightsVertical,
    4544             :                                       double &dfInvWeights)
    4545             : {
    4546             :     // Commonly used; save locally.
    4547     9164113 :     const int nSrcXSize = poWK->nSrcXSize;
    4548     9164113 :     const int nSrcYSize = poWK->nSrcYSize;
    4549             : 
    4550     9164113 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4551     9164113 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4552     9164113 :     const GPtrDiff_t iSrcOffset =
    4553     9164113 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4554     9164113 :     const int nXRadius = poWK->nXRadius;
    4555     9164113 :     const int nYRadius = poWK->nYRadius;
    4556             : 
    4557             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4558     9164113 :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4559             :         nYRadius > nSrcYSize)
    4560           2 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4561           3 :                                                   pValue);
    4562             : 
    4563     9173431 :     const T *pSrcBand =
    4564     9173431 :         reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    4565             : 
    4566     9173431 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4567     9173431 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4568     9173431 :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4569     9170971 :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4570             : 
    4571     9152431 :     int iMin = 1 - nXRadius;
    4572     9152431 :     if (iSrcX + iMin < 0)
    4573       43143 :         iMin = -iSrcX;
    4574     9152431 :     int iMax = nXRadius;
    4575     9152431 :     if (iSrcX + iMax >= nSrcXSize - 1)
    4576       38106 :         iMax = nSrcXSize - 1 - iSrcX;
    4577             : 
    4578     9152431 :     int jMin = 1 - nYRadius;
    4579     9152431 :     if (iSrcY + jMin < 0)
    4580       49554 :         jMin = -iSrcY;
    4581     9152431 :     int jMax = nYRadius;
    4582     9152431 :     if (iSrcY + jMax >= nSrcYSize - 1)
    4583       36028 :         jMax = nSrcYSize - 1 - iSrcY;
    4584             : 
    4585     9152431 :     if (iBand == 0)
    4586             :     {
    4587     3744991 :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4588             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4589             :                           padfWeightsVertical, dfInvWeights);
    4590             :     }
    4591             : 
    4592     9155921 :     GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4593             :     // Process by chunk of 4 rows.
    4594     9155921 :     int jC = 0;
    4595     9155921 :     int j = jMin;
    4596     9155921 :     double dfAccumulator = 0.0;
    4597    19407493 :     for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
    4598             :     {
    4599             :         // Loop over all pixels in the row.
    4600    10257992 :         int iC = 0;
    4601    10257992 :         int i = iMin;
    4602             :         // Process by chunk of 4 cols.
    4603    10257992 :         XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
    4604    10233532 :         XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
    4605    10239762 :         XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
    4606    10254642 :         XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
    4607    26741980 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4608             :         {
    4609             :             // Retrieve the pixel & accumulate.
    4610    16480288 :             XMMReg4Double v_pixels_1 =
    4611    16480288 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4612    16505088 :             XMMReg4Double v_pixels_2 =
    4613    16505088 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
    4614    16515588 :             XMMReg4Double v_pixels_3 =
    4615    16515588 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4616    16508288 :             XMMReg4Double v_pixels_4 =
    4617    16508288 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4618             : 
    4619    16516088 :             XMMReg4Double v_padfWeight =
    4620    16516088 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4621             : 
    4622    16475288 :             v_acc_1 += v_pixels_1 * v_padfWeight;
    4623    16487688 :             v_acc_2 += v_pixels_2 * v_padfWeight;
    4624    16511288 :             v_acc_3 += v_pixels_3 * v_padfWeight;
    4625    16510888 :             v_acc_4 += v_pixels_4 * v_padfWeight;
    4626             :         }
    4627             : 
    4628    10261702 :         if (i < iMax)
    4629             :         {
    4630      142910 :             XMMReg2Double v_pixels_1 =
    4631      142910 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
    4632      142910 :             XMMReg2Double v_pixels_2 =
    4633      142910 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
    4634      142910 :             XMMReg2Double v_pixels_3 =
    4635      142910 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4636      142910 :             XMMReg2Double v_pixels_4 =
    4637      142910 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4638             : 
    4639      142910 :             XMMReg2Double v_padfWeight =
    4640      142910 :                 XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
    4641             : 
    4642      142910 :             v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
    4643      142910 :             v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
    4644      142910 :             v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
    4645      142910 :             v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
    4646             : 
    4647      142910 :             i += 2;
    4648      142910 :             iC += 2;
    4649             :         }
    4650             : 
    4651    10261702 :         double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
    4652    10242552 :         double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
    4653    10249302 :         double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
    4654    10261302 :         double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
    4655             : 
    4656    10251552 :         if (i == iMax)
    4657             :         {
    4658       49195 :             dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
    4659       49195 :                                     padfWeightsHorizontal[iC];
    4660       49195 :             dfAccumulatorLocal_2 +=
    4661       49195 :                 static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
    4662       49195 :                 padfWeightsHorizontal[iC];
    4663       49195 :             dfAccumulatorLocal_3 +=
    4664       49195 :                 static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
    4665       49195 :                 padfWeightsHorizontal[iC];
    4666       49195 :             dfAccumulatorLocal_4 +=
    4667       49195 :                 static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
    4668       49195 :                 padfWeightsHorizontal[iC];
    4669             :         }
    4670             : 
    4671    10251552 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
    4672    10251552 :         dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
    4673    10251552 :         dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
    4674    10251552 :         dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
    4675             :     }
    4676    22263141 :     for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
    4677             :     {
    4678             :         // Loop over all pixels in the row.
    4679    13092440 :         int iC = 0;
    4680    13092440 :         int i = iMin;
    4681             :         // Process by chunk of 4 cols.
    4682    13092440 :         XMMReg4Double v_acc = XMMReg4Double::Zero();
    4683    26163863 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4684             :         {
    4685             :             // Retrieve the pixel & accumulate.
    4686    13074523 :             XMMReg4Double v_pixels =
    4687    13074523 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4688    13100623 :             XMMReg4Double v_padfWeight =
    4689    13100623 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4690             : 
    4691    13116523 :             v_acc += v_pixels * v_padfWeight;
    4692             :         }
    4693             : 
    4694    13089440 :         double dfAccumulatorLocal = v_acc.GetHorizSum();
    4695             : 
    4696    13113640 :         if (i < iMax)
    4697             :         {
    4698      173964 :             dfAccumulatorLocal +=
    4699      173964 :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4700      173964 :             dfAccumulatorLocal +=
    4701      173964 :                 pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
    4702      173964 :             i += 2;
    4703      173964 :             iC += 2;
    4704             :         }
    4705    13113640 :         if (i == iMax)
    4706             :         {
    4707       33020 :             dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
    4708       33020 :                                   padfWeightsHorizontal[iC];
    4709             :         }
    4710             : 
    4711    13113640 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4712             :     }
    4713             : 
    4714     9170701 :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4715             : 
    4716     9179581 :     return true;
    4717             : }
    4718             : 
    4719             : /************************************************************************/
    4720             : /*                     GWKResampleNoMasksT<GByte>()                     */
    4721             : /************************************************************************/
    4722             : 
    4723             : template <>
    4724     8586280 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
    4725             :                                 double dfSrcX, double dfSrcY, GByte *pValue,
    4726             :                                 double *padfWeightsHorizontal,
    4727             :                                 double *padfWeightsVertical,
    4728             :                                 double &dfInvWeights)
    4729             : {
    4730     8586280 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4731             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4732     8564370 :                                      dfInvWeights);
    4733             : }
    4734             : 
    4735             : /************************************************************************/
    4736             : /*                     GWKResampleNoMasksT<GInt16>()                    */
    4737             : /************************************************************************/
    4738             : 
    4739             : template <>
    4740      252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
    4741             :                                  double dfSrcX, double dfSrcY, GInt16 *pValue,
    4742             :                                  double *padfWeightsHorizontal,
    4743             :                                  double *padfWeightsVertical,
    4744             :                                  double &dfInvWeights)
    4745             : {
    4746      252563 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4747             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4748      252563 :                                      dfInvWeights);
    4749             : }
    4750             : 
    4751             : /************************************************************************/
    4752             : /*                     GWKResampleNoMasksT<GUInt16>()                   */
    4753             : /************************************************************************/
    4754             : 
    4755             : template <>
    4756      343440 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
    4757             :                                   double dfSrcX, double dfSrcY, GUInt16 *pValue,
    4758             :                                   double *padfWeightsHorizontal,
    4759             :                                   double *padfWeightsVertical,
    4760             :                                   double &dfInvWeights)
    4761             : {
    4762      343440 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4763             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4764      343440 :                                      dfInvWeights);
    4765             : }
    4766             : 
    4767             : /************************************************************************/
    4768             : /*                     GWKResampleNoMasksT<float>()                     */
    4769             : /************************************************************************/
    4770             : 
    4771             : template <>
    4772        2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
    4773             :                                 double dfSrcX, double dfSrcY, float *pValue,
    4774             :                                 double *padfWeightsHorizontal,
    4775             :                                 double *padfWeightsVertical,
    4776             :                                 double &dfInvWeights)
    4777             : {
    4778        2500 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4779             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4780        2500 :                                      dfInvWeights);
    4781             : }
    4782             : 
    4783             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    4784             : 
    4785             : /************************************************************************/
    4786             : /*                     GWKResampleNoMasksT<double>()                    */
    4787             : /************************************************************************/
    4788             : 
    4789             : template <>
    4790             : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
    4791             :                                  double dfSrcX, double dfSrcY, double *pValue,
    4792             :                                  double *padfWeightsHorizontal,
    4793             :                                  double *padfWeightsVertical,
    4794             :                                  double &dfInvWeights)
    4795             : {
    4796             :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4797             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4798             :                                      dfInvWeights);
    4799             : }
    4800             : 
    4801             : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
    4802             : 
    4803             : #endif /* defined(__x86_64) || defined(_M_X64) */
    4804             : 
    4805             : /************************************************************************/
    4806             : /*                     GWKRoundSourceCoordinates()                      */
    4807             : /************************************************************************/
    4808             : 
    4809        1000 : static void GWKRoundSourceCoordinates(
    4810             :     int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
    4811             :     double dfSrcCoordPrecision, double dfErrorThreshold,
    4812             :     GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
    4813             :     double dfDstY)
    4814             : {
    4815        1000 :     double dfPct = 0.8;
    4816        1000 :     if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
    4817             :     {
    4818        1000 :         dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
    4819             :     }
    4820        1000 :     const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
    4821             : 
    4822      501000 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    4823             :     {
    4824      500000 :         const double dfXBefore = padfX[iDstX];
    4825      500000 :         const double dfYBefore = padfY[iDstX];
    4826      500000 :         padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    4827             :                        dfSrcCoordPrecision;
    4828      500000 :         padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    4829             :                        dfSrcCoordPrecision;
    4830             : 
    4831             :         // If we are in an uncertainty zone, go to non-approximated
    4832             :         // transformation.
    4833             :         // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
    4834             :         // be at least 10 times greater than the approximation error.
    4835      500000 :         if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
    4836      399914 :             fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
    4837             :         {
    4838      180090 :             padfX[iDstX] = iDstX + dfDstXOff;
    4839      180090 :             padfY[iDstX] = dfDstY;
    4840      180090 :             padfZ[iDstX] = 0.0;
    4841      180090 :             pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
    4842      180090 :                            padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
    4843      180090 :             padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    4844             :                            dfSrcCoordPrecision;
    4845      180090 :             padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    4846             :                            dfSrcCoordPrecision;
    4847             :         }
    4848             :     }
    4849        1000 : }
    4850             : 
    4851             : /************************************************************************/
    4852             : /*                           GWKOpenCLCase()                            */
    4853             : /*                                                                      */
    4854             : /*      This is identical to GWKGeneralCase(), but functions via        */
    4855             : /*      OpenCL. This means we have vector optimization (SSE) and/or     */
    4856             : /*      GPU optimization depending on our prefs. The code itself is     */
    4857             : /*      general and not optimized, but by defining constants we can     */
    4858             : /*      make some pretty darn good code on the fly.                     */
    4859             : /************************************************************************/
    4860             : 
    4861             : #if defined(HAVE_OPENCL)
    4862           0 : static CPLErr GWKOpenCLCase(GDALWarpKernel *poWK)
    4863             : {
    4864           0 :     const int nDstXSize = poWK->nDstXSize;
    4865           0 :     const int nDstYSize = poWK->nDstYSize;
    4866           0 :     const int nSrcXSize = poWK->nSrcXSize;
    4867           0 :     const int nSrcYSize = poWK->nSrcYSize;
    4868           0 :     const int nDstXOff = poWK->nDstXOff;
    4869           0 :     const int nDstYOff = poWK->nDstYOff;
    4870           0 :     const int nSrcXOff = poWK->nSrcXOff;
    4871           0 :     const int nSrcYOff = poWK->nSrcYOff;
    4872           0 :     bool bUseImag = false;
    4873             : 
    4874             :     cl_channel_type imageFormat;
    4875           0 :     switch (poWK->eWorkingDataType)
    4876             :     {
    4877           0 :         case GDT_Byte:
    4878           0 :             imageFormat = CL_UNORM_INT8;
    4879           0 :             break;
    4880           0 :         case GDT_UInt16:
    4881           0 :             imageFormat = CL_UNORM_INT16;
    4882           0 :             break;
    4883           0 :         case GDT_CInt16:
    4884           0 :             bUseImag = true;
    4885             :             [[fallthrough]];
    4886           0 :         case GDT_Int16:
    4887           0 :             imageFormat = CL_SNORM_INT16;
    4888           0 :             break;
    4889           0 :         case GDT_CFloat32:
    4890           0 :             bUseImag = true;
    4891             :             [[fallthrough]];
    4892           0 :         case GDT_Float32:
    4893           0 :             imageFormat = CL_FLOAT;
    4894           0 :             break;
    4895           0 :         default:
    4896             :             // No support for higher precision formats.
    4897           0 :             CPLDebug("OpenCL", "Unsupported resampling OpenCL data type %d.",
    4898           0 :                      static_cast<int>(poWK->eWorkingDataType));
    4899           0 :             return CE_Warning;
    4900             :     }
    4901             : 
    4902             :     OCLResampAlg resampAlg;
    4903           0 :     switch (poWK->eResample)
    4904             :     {
    4905           0 :         case GRA_Bilinear:
    4906           0 :             resampAlg = OCL_Bilinear;
    4907           0 :             break;
    4908           0 :         case GRA_Cubic:
    4909           0 :             resampAlg = OCL_Cubic;
    4910           0 :             break;
    4911           0 :         case GRA_CubicSpline:
    4912           0 :             resampAlg = OCL_CubicSpline;
    4913           0 :             break;
    4914           0 :         case GRA_Lanczos:
    4915           0 :             resampAlg = OCL_Lanczos;
    4916           0 :             break;
    4917           0 :         default:
    4918             :             // No support for higher precision formats.
    4919           0 :             CPLDebug("OpenCL",
    4920             :                      "Unsupported resampling OpenCL resampling alg %d.",
    4921           0 :                      static_cast<int>(poWK->eResample));
    4922           0 :             return CE_Warning;
    4923             :     }
    4924             : 
    4925           0 :     struct oclWarper *warper = nullptr;
    4926             :     cl_int err;
    4927           0 :     CPLErr eErr = CE_None;
    4928             : 
    4929             :     // TODO(schwehr): Fix indenting.
    4930             :     try
    4931             :     {
    4932             : 
    4933             :         // Using a factor of 2 or 4 seems to have much less rounding error
    4934             :         // than 3 on the GPU.
    4935             :         // Then the rounding error can cause strange artifacts under the
    4936             :         // right conditions.
    4937           0 :         warper = GDALWarpKernelOpenCL_createEnv(
    4938             :             nSrcXSize, nSrcYSize, nDstXSize, nDstYSize, imageFormat,
    4939           0 :             poWK->nBands, 4, bUseImag, poWK->papanBandSrcValid != nullptr,
    4940             :             poWK->pafDstDensity, poWK->padfDstNoDataReal, resampAlg, &err);
    4941             : 
    4942           0 :         if (err != CL_SUCCESS || warper == nullptr)
    4943             :         {
    4944           0 :             eErr = CE_Warning;
    4945           0 :             if (warper != nullptr)
    4946           0 :                 throw eErr;
    4947           0 :             return eErr;
    4948             :         }
    4949             : 
    4950           0 :         CPLDebug("GDAL",
    4951             :                  "GDALWarpKernel()::GWKOpenCLCase() "
    4952             :                  "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
    4953             :                  nSrcXOff, nSrcYOff, nSrcXSize, nSrcYSize, nDstXOff, nDstYOff,
    4954             :                  nDstXSize, nDstYSize);
    4955             : 
    4956           0 :         if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
    4957             :         {
    4958           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    4959           0 :             eErr = CE_Failure;
    4960           0 :             throw eErr;
    4961             :         }
    4962             : 
    4963             :         /* ====================================================================
    4964             :          */
    4965             :         /*      Loop over bands. */
    4966             :         /* ====================================================================
    4967             :          */
    4968           0 :         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    4969             :         {
    4970           0 :             if (poWK->papanBandSrcValid != nullptr &&
    4971           0 :                 poWK->papanBandSrcValid[iBand] != nullptr)
    4972             :             {
    4973           0 :                 GDALWarpKernelOpenCL_setSrcValid(
    4974             :                     warper,
    4975           0 :                     reinterpret_cast<int *>(poWK->papanBandSrcValid[iBand]),
    4976             :                     iBand);
    4977           0 :                 if (err != CL_SUCCESS)
    4978             :                 {
    4979           0 :                     CPLError(
    4980             :                         CE_Failure, CPLE_AppDefined,
    4981             :                         "OpenCL routines reported failure (%d) on line %d.",
    4982             :                         static_cast<int>(err), __LINE__);
    4983           0 :                     eErr = CE_Failure;
    4984           0 :                     throw eErr;
    4985             :                 }
    4986             :             }
    4987             : 
    4988           0 :             err = GDALWarpKernelOpenCL_setSrcImg(
    4989           0 :                 warper, poWK->papabySrcImage[iBand], iBand);
    4990           0 :             if (err != CL_SUCCESS)
    4991             :             {
    4992           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    4993             :                          "OpenCL routines reported failure (%d) on line %d.",
    4994             :                          static_cast<int>(err), __LINE__);
    4995           0 :                 eErr = CE_Failure;
    4996           0 :                 throw eErr;
    4997             :             }
    4998             : 
    4999           0 :             err = GDALWarpKernelOpenCL_setDstImg(
    5000           0 :                 warper, poWK->papabyDstImage[iBand], iBand);
    5001           0 :             if (err != CL_SUCCESS)
    5002             :             {
    5003           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    5004             :                          "OpenCL routines reported failure (%d) on line %d.",
    5005             :                          static_cast<int>(err), __LINE__);
    5006           0 :                 eErr = CE_Failure;
    5007           0 :                 throw eErr;
    5008             :             }
    5009             :         }
    5010             : 
    5011             :         /* --------------------------------------------------------------------
    5012             :          */
    5013             :         /*      Allocate x,y,z coordinate arrays for transformation ... one */
    5014             :         /*      scanlines worth of positions. */
    5015             :         /* --------------------------------------------------------------------
    5016             :          */
    5017             : 
    5018             :         // For x, 2 *, because we cache the precomputed values at the end.
    5019             :         double *padfX =
    5020           0 :             static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5021             :         double *padfY =
    5022           0 :             static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5023             :         double *padfZ =
    5024           0 :             static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5025             :         int *pabSuccess =
    5026           0 :             static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5027           0 :         const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5028           0 :             poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5029           0 :         const double dfErrorThreshold = CPLAtof(CSLFetchNameValueDef(
    5030           0 :             poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5031             : 
    5032             :         // Precompute values.
    5033           0 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5034           0 :             padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5035             : 
    5036             :         /* ====================================================================
    5037             :          */
    5038             :         /*      Loop over output lines. */
    5039             :         /* ====================================================================
    5040             :          */
    5041           0 :         for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; ++iDstY)
    5042             :         {
    5043             :             /* ----------------------------------------------------------------
    5044             :              */
    5045             :             /*      Setup points to transform to source image space. */
    5046             :             /* ----------------------------------------------------------------
    5047             :              */
    5048           0 :             memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5049           0 :             const double dfYConst = iDstY + 0.5 + poWK->nDstYOff;
    5050           0 :             for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5051           0 :                 padfY[iDstX] = dfYConst;
    5052           0 :             memset(padfZ, 0, sizeof(double) * nDstXSize);
    5053             : 
    5054             :             /* ----------------------------------------------------------------
    5055             :              */
    5056             :             /*      Transform the points from destination pixel/line
    5057             :              * coordinates*/
    5058             :             /*      to source pixel/line coordinates. */
    5059             :             /* ----------------------------------------------------------------
    5060             :              */
    5061           0 :             poWK->pfnTransformer(poWK->pTransformerArg, TRUE, nDstXSize, padfX,
    5062             :                                  padfY, padfZ, pabSuccess);
    5063           0 :             if (dfSrcCoordPrecision > 0.0)
    5064             :             {
    5065           0 :                 GWKRoundSourceCoordinates(
    5066             :                     nDstXSize, padfX, padfY, padfZ, pabSuccess,
    5067             :                     dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
    5068             :                     poWK->pTransformerArg, 0.5 + nDstXOff,
    5069           0 :                     iDstY + 0.5 + nDstYOff);
    5070             :             }
    5071             : 
    5072           0 :             err = GDALWarpKernelOpenCL_setCoordRow(
    5073             :                 warper, padfX, padfY, nSrcXOff, nSrcYOff, pabSuccess, iDstY);
    5074           0 :             if (err != CL_SUCCESS)
    5075             :             {
    5076           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    5077             :                          "OpenCL routines reported failure (%d) on line %d.",
    5078             :                          static_cast<int>(err), __LINE__);
    5079           0 :                 eErr = CE_Failure;
    5080           0 :                 break;
    5081             :             }
    5082             : 
    5083             :             // Update the valid & density masks because we don't do so in the
    5084             :             // kernel.
    5085           0 :             for (int iDstX = 0; iDstX < nDstXSize && eErr == CE_None; iDstX++)
    5086             :             {
    5087           0 :                 const double dfX = padfX[iDstX];
    5088           0 :                 const double dfY = padfY[iDstX];
    5089           0 :                 const GPtrDiff_t iDstOffset =
    5090           0 :                     iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5091             : 
    5092             :                 // See GWKGeneralCase() for appropriate commenting.
    5093           0 :                 if (!pabSuccess[iDstX] || dfX < nSrcXOff || dfY < nSrcYOff)
    5094           0 :                     continue;
    5095             : 
    5096           0 :                 int iSrcX = static_cast<int>(dfX) - nSrcXOff;
    5097           0 :                 int iSrcY = static_cast<int>(dfY) - nSrcYOff;
    5098             : 
    5099           0 :                 if (iSrcX < 0 || iSrcX >= nSrcXSize || iSrcY < 0 ||
    5100             :                     iSrcY >= nSrcYSize)
    5101           0 :                     continue;
    5102             : 
    5103           0 :                 GPtrDiff_t iSrcOffset =
    5104           0 :                     iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    5105           0 :                 double dfDensity = 1.0;
    5106             : 
    5107           0 :                 if (poWK->pafUnifiedSrcDensity != nullptr && iSrcX >= 0 &&
    5108           0 :                     iSrcY >= 0 && iSrcX < nSrcXSize && iSrcY < nSrcYSize)
    5109           0 :                     dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5110             : 
    5111           0 :                 GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5112             : 
    5113             :                 // Because this is on the bit-wise level, it can't be done well
    5114             :                 // in OpenCL.
    5115           0 :                 if (poWK->panDstValid != nullptr)
    5116           0 :                     poWK->panDstValid[iDstOffset >> 5] |=
    5117           0 :                         0x01 << (iDstOffset & 0x1f);
    5118             :             }
    5119             :         }
    5120             : 
    5121           0 :         CPLFree(padfX);
    5122           0 :         CPLFree(padfY);
    5123           0 :         CPLFree(padfZ);
    5124           0 :         CPLFree(pabSuccess);
    5125             : 
    5126           0 :         if (eErr != CE_None)
    5127           0 :             throw eErr;
    5128             : 
    5129           0 :         err = GDALWarpKernelOpenCL_runResamp(
    5130             :             warper, poWK->pafUnifiedSrcDensity, poWK->panUnifiedSrcValid,
    5131             :             poWK->pafDstDensity, poWK->panDstValid, poWK->dfXScale,
    5132             :             poWK->dfYScale, poWK->dfXFilter, poWK->dfYFilter, poWK->nXRadius,
    5133             :             poWK->nYRadius, poWK->nFiltInitX, poWK->nFiltInitY);
    5134             : 
    5135           0 :         if (err != CL_SUCCESS)
    5136             :         {
    5137           0 :             CPLError(CE_Failure, CPLE_AppDefined,
    5138             :                      "OpenCL routines reported failure (%d) on line %d.",
    5139             :                      static_cast<int>(err), __LINE__);
    5140           0 :             eErr = CE_Failure;
    5141           0 :             throw eErr;
    5142             :         }
    5143             : 
    5144             :         /* ====================================================================
    5145             :          */
    5146             :         /*      Loop over output lines. */
    5147             :         /* ====================================================================
    5148             :          */
    5149           0 :         for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; iDstY++)
    5150             :         {
    5151           0 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5152             :             {
    5153           0 :                 void *rowReal = nullptr;
    5154           0 :                 void *rowImag = nullptr;
    5155           0 :                 GByte *pabyDst = poWK->papabyDstImage[iBand];
    5156             : 
    5157           0 :                 err = GDALWarpKernelOpenCL_getRow(warper, &rowReal, &rowImag,
    5158             :                                                   iDstY, iBand);
    5159           0 :                 if (err != CL_SUCCESS)
    5160             :                 {
    5161           0 :                     CPLError(
    5162             :                         CE_Failure, CPLE_AppDefined,
    5163             :                         "OpenCL routines reported failure (%d) on line %d.",
    5164             :                         static_cast<int>(err), __LINE__);
    5165           0 :                     eErr = CE_Failure;
    5166           0 :                     throw eErr;
    5167             :                 }
    5168             : 
    5169             :                 // Copy the data from the warper to GDAL's memory.
    5170           0 :                 switch (poWK->eWorkingDataType)
    5171             :                 {
    5172           0 :                     case GDT_Byte:
    5173           0 :                         memcpy(&(pabyDst[iDstY * nDstXSize]), rowReal,
    5174             :                                sizeof(GByte) * nDstXSize);
    5175           0 :                         break;
    5176           0 :                     case GDT_Int16:
    5177           0 :                         memcpy(&(reinterpret_cast<GInt16 *>(
    5178           0 :                                    pabyDst)[iDstY * nDstXSize]),
    5179           0 :                                rowReal, sizeof(GInt16) * nDstXSize);
    5180           0 :                         break;
    5181           0 :                     case GDT_UInt16:
    5182           0 :                         memcpy(&(reinterpret_cast<GUInt16 *>(
    5183           0 :                                    pabyDst)[iDstY * nDstXSize]),
    5184           0 :                                rowReal, sizeof(GUInt16) * nDstXSize);
    5185           0 :                         break;
    5186           0 :                     case GDT_Float32:
    5187           0 :                         memcpy(&(reinterpret_cast<float *>(
    5188           0 :                                    pabyDst)[iDstY * nDstXSize]),
    5189           0 :                                rowReal, sizeof(float) * nDstXSize);
    5190           0 :                         break;
    5191           0 :                     case GDT_CInt16:
    5192             :                     {
    5193           0 :                         GInt16 *pabyDstI16 = &(reinterpret_cast<GInt16 *>(
    5194           0 :                             pabyDst)[iDstY * nDstXSize]);
    5195           0 :                         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5196             :                         {
    5197           0 :                             pabyDstI16[iDstX * 2] =
    5198           0 :                                 static_cast<GInt16 *>(rowReal)[iDstX];
    5199           0 :                             pabyDstI16[iDstX * 2 + 1] =
    5200           0 :                                 static_cast<GInt16 *>(rowImag)[iDstX];
    5201             :                         }
    5202             :                     }
    5203           0 :                     break;
    5204           0 :                     case GDT_CFloat32:
    5205             :                     {
    5206           0 :                         float *pabyDstF32 = &(reinterpret_cast<float *>(
    5207           0 :                             pabyDst)[iDstY * nDstXSize]);
    5208           0 :                         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5209             :                         {
    5210           0 :                             pabyDstF32[iDstX * 2] =
    5211           0 :                                 static_cast<float *>(rowReal)[iDstX];
    5212           0 :                             pabyDstF32[iDstX * 2 + 1] =
    5213           0 :                                 static_cast<float *>(rowImag)[iDstX];
    5214             :                         }
    5215             :                     }
    5216           0 :                     break;
    5217           0 :                     default:
    5218             :                         // No support for higher precision formats.
    5219           0 :                         CPLError(CE_Failure, CPLE_AppDefined,
    5220             :                                  "Unsupported resampling OpenCL data type %d.",
    5221           0 :                                  static_cast<int>(poWK->eWorkingDataType));
    5222           0 :                         eErr = CE_Failure;
    5223           0 :                         throw eErr;
    5224             :                 }
    5225             :             }
    5226             :         }
    5227             :     }
    5228           0 :     catch (const CPLErr &)
    5229             :     {
    5230             :     }
    5231             : 
    5232           0 :     if ((err = GDALWarpKernelOpenCL_deleteEnv(warper)) != CL_SUCCESS)
    5233             :     {
    5234           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    5235             :                  "OpenCL routines reported failure (%d) on line %d.",
    5236             :                  static_cast<int>(err), __LINE__);
    5237           0 :         return CE_Failure;
    5238             :     }
    5239             : 
    5240           0 :     return eErr;
    5241             : }
    5242             : #endif /* defined(HAVE_OPENCL) */
    5243             : 
    5244             : /************************************************************************/
    5245             : /*                     GWKCheckAndComputeSrcOffsets()                   */
    5246             : /************************************************************************/
    5247             : static CPL_INLINE bool
    5248   109623000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
    5249             :                              int _iDstY, double *_padfX, double *_padfY,
    5250             :                              int _nSrcXSize, int _nSrcYSize,
    5251             :                              GPtrDiff_t &iSrcOffset)
    5252             : {
    5253   109623000 :     const GDALWarpKernel *_poWK = psJob->poWK;
    5254   109749000 :     for (int iTry = 0; iTry < 2; ++iTry)
    5255             :     {
    5256   109757000 :         if (iTry == 1)
    5257             :         {
    5258             :             // If the source coordinate is slightly outside of the source raster
    5259             :             // retry to transform it alone, so that the exact coordinate
    5260             :             // transformer is used.
    5261             : 
    5262      125624 :             _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
    5263      125624 :             _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
    5264      125624 :             double dfZ = 0;
    5265      125624 :             _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
    5266      125624 :                                   _padfX + _iDstX, _padfY + _iDstX, &dfZ,
    5267      125624 :                                   _pabSuccess + _iDstX);
    5268             :         }
    5269   109757000 :         if (!_pabSuccess[_iDstX])
    5270     3593220 :             return false;
    5271             : 
    5272             :         // If this happens this is likely the symptom of a bug somewhere.
    5273   106164000 :         if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
    5274             :         {
    5275             :             static bool bNanCoordFound = false;
    5276           0 :             if (!bNanCoordFound)
    5277             :             {
    5278           0 :                 CPLDebug("WARP",
    5279             :                          "GWKCheckAndComputeSrcOffsets(): "
    5280             :                          "NaN coordinate found on point %d.",
    5281             :                          _iDstX);
    5282           0 :                 bNanCoordFound = true;
    5283             :             }
    5284           0 :             return false;
    5285             :         }
    5286             : 
    5287             :         /* --------------------------------------------------------------------
    5288             :          */
    5289             :         /*      Figure out what pixel we want in our source raster, and skip */
    5290             :         /*      further processing if it is well off the source image. */
    5291             :         /* --------------------------------------------------------------------
    5292             :          */
    5293             :         /* We test against the value before casting to avoid the */
    5294             :         /* problem of asymmetric truncation effects around zero.  That is */
    5295             :         /* -0.5 will be 0 when cast to an int. */
    5296   106158000 :         if (_padfX[_iDstX] < _poWK->nSrcXOff)
    5297             :         {
    5298             :             // If the source coordinate is slightly outside of the source raster
    5299             :             // retry to transform it alone, so that the exact coordinate
    5300             :             // transformer is used.
    5301     4137250 :             if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
    5302       21433 :                 continue;
    5303     4115820 :             return false;
    5304             :         }
    5305             : 
    5306   102021000 :         if (_padfY[_iDstX] < _poWK->nSrcYOff)
    5307             :         {
    5308             :             // If the source coordinate is slightly outside of the source raster
    5309             :             // retry to transform it alone, so that the exact coordinate
    5310             :             // transformer is used.
    5311     4792200 :             if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
    5312       38435 :                 continue;
    5313     4753770 :             return false;
    5314             :         }
    5315             : 
    5316             :         // Check for potential overflow when casting from float to int, (if
    5317             :         // operating outside natural projection area, padfX/Y can be a very huge
    5318             :         // positive number before doing the actual conversion), as such cast is
    5319             :         // undefined behavior that can trigger exception with some compilers
    5320             :         // (see #6753)
    5321    97229000 :         if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
    5322             :         {
    5323             :             // If the source coordinate is slightly outside of the source raster
    5324             :             // retry to transform it alone, so that the exact coordinate
    5325             :             // transformer is used.
    5326     3496360 :             if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
    5327       33239 :                 continue;
    5328     3463130 :             return false;
    5329             :         }
    5330    93732600 :         if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
    5331             :         {
    5332             :             // If the source coordinate is slightly outside of the source raster
    5333             :             // retry to transform it alone, so that the exact coordinate
    5334             :             // transformer is used.
    5335     3731400 :             if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
    5336       32517 :                 continue;
    5337     3698880 :             return false;
    5338             :         }
    5339             : 
    5340    90001200 :         break;
    5341             :     }
    5342             : 
    5343    89993100 :     int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
    5344    89993100 :     int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
    5345    89993100 :     if (iSrcX == _nSrcXSize)
    5346           0 :         iSrcX--;
    5347    89993100 :     if (iSrcY == _nSrcYSize)
    5348           0 :         iSrcY--;
    5349             : 
    5350             :     // Those checks should normally be OK given the previous ones.
    5351    89993100 :     CPLAssert(iSrcX >= 0);
    5352    89993100 :     CPLAssert(iSrcY >= 0);
    5353    89993100 :     CPLAssert(iSrcX < _nSrcXSize);
    5354    89993100 :     CPLAssert(iSrcY < _nSrcYSize);
    5355             : 
    5356    89993100 :     iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
    5357             : 
    5358    89993100 :     return true;
    5359             : }
    5360             : 
    5361             : /************************************************************************/
    5362             : /*                   GWKOneSourceCornerFailsToReproject()               */
    5363             : /************************************************************************/
    5364             : 
    5365         719 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
    5366             : {
    5367         719 :     GDALWarpKernel *poWK = psJob->poWK;
    5368        2147 :     for (int iY = 0; iY <= 1; ++iY)
    5369             :     {
    5370        4290 :         for (int iX = 0; iX <= 1; ++iX)
    5371             :         {
    5372        2862 :             double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
    5373        2862 :             double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
    5374        2862 :             double dfZTmp = 0;
    5375        2862 :             int nSuccess = FALSE;
    5376        2862 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
    5377             :                                  &dfYTmp, &dfZTmp, &nSuccess);
    5378        2862 :             if (!nSuccess)
    5379           6 :                 return true;
    5380             :         }
    5381             :     }
    5382         713 :     return false;
    5383             : }
    5384             : 
    5385             : /************************************************************************/
    5386             : /*                       GWKAdjustSrcOffsetOnEdge()                     */
    5387             : /************************************************************************/
    5388             : 
    5389        9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
    5390             :                                      GPtrDiff_t &iSrcOffset)
    5391             : {
    5392        9714 :     GDALWarpKernel *poWK = psJob->poWK;
    5393        9714 :     const int nSrcXSize = poWK->nSrcXSize;
    5394        9714 :     const int nSrcYSize = poWK->nSrcYSize;
    5395             : 
    5396             :     // Check if the computed source position slightly altered
    5397             :     // fails to reproject. If so, then we are at the edge of
    5398             :     // the validity area, and it is worth checking neighbour
    5399             :     // source pixels for validity.
    5400        9714 :     int nSuccess = FALSE;
    5401             :     {
    5402        9714 :         double dfXTmp =
    5403        9714 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5404        9714 :         double dfYTmp =
    5405        9714 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5406        9714 :         double dfZTmp = 0;
    5407        9714 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5408             :                              &dfZTmp, &nSuccess);
    5409             :     }
    5410        9714 :     if (nSuccess)
    5411             :     {
    5412        6996 :         double dfXTmp =
    5413        6996 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5414        6996 :         double dfYTmp =
    5415        6996 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5416        6996 :         double dfZTmp = 0;
    5417        6996 :         nSuccess = FALSE;
    5418        6996 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5419             :                              &dfZTmp, &nSuccess);
    5420             :     }
    5421        9714 :     if (nSuccess)
    5422             :     {
    5423        5624 :         double dfXTmp =
    5424        5624 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5425        5624 :         double dfYTmp =
    5426        5624 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5427        5624 :         double dfZTmp = 0;
    5428        5624 :         nSuccess = FALSE;
    5429        5624 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5430             :                              &dfZTmp, &nSuccess);
    5431             :     }
    5432             : 
    5433       14166 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5434        4452 :         CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
    5435             :     {
    5436        1860 :         iSrcOffset++;
    5437        1860 :         return true;
    5438             :     }
    5439       10290 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5440        2436 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
    5441             :     {
    5442        1334 :         iSrcOffset += nSrcXSize;
    5443        1334 :         return true;
    5444             :     }
    5445        7838 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5446        1318 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
    5447             :     {
    5448         956 :         iSrcOffset--;
    5449         956 :         return true;
    5450             :     }
    5451        5924 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5452         360 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
    5453             :     {
    5454         340 :         iSrcOffset -= nSrcXSize;
    5455         340 :         return true;
    5456             :     }
    5457             : 
    5458        5224 :     return false;
    5459             : }
    5460             : 
    5461             : /************************************************************************/
    5462             : /*                 GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity()          */
    5463             : /************************************************************************/
    5464             : 
    5465           0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
    5466             :                                                       GPtrDiff_t &iSrcOffset)
    5467             : {
    5468           0 :     GDALWarpKernel *poWK = psJob->poWK;
    5469           0 :     const int nSrcXSize = poWK->nSrcXSize;
    5470           0 :     const int nSrcYSize = poWK->nSrcYSize;
    5471             : 
    5472             :     // Check if the computed source position slightly altered
    5473             :     // fails to reproject. If so, then we are at the edge of
    5474             :     // the validity area, and it is worth checking neighbour
    5475             :     // source pixels for validity.
    5476           0 :     int nSuccess = FALSE;
    5477             :     {
    5478           0 :         double dfXTmp =
    5479           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5480           0 :         double dfYTmp =
    5481           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5482           0 :         double dfZTmp = 0;
    5483           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5484             :                              &dfZTmp, &nSuccess);
    5485             :     }
    5486           0 :     if (nSuccess)
    5487             :     {
    5488           0 :         double dfXTmp =
    5489           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5490           0 :         double dfYTmp =
    5491           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5492           0 :         double dfZTmp = 0;
    5493           0 :         nSuccess = FALSE;
    5494           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5495             :                              &dfZTmp, &nSuccess);
    5496             :     }
    5497           0 :     if (nSuccess)
    5498             :     {
    5499           0 :         double dfXTmp =
    5500           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5501           0 :         double dfYTmp =
    5502           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5503           0 :         double dfZTmp = 0;
    5504           0 :         nSuccess = FALSE;
    5505           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5506             :                              &dfZTmp, &nSuccess);
    5507             :     }
    5508             : 
    5509           0 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5510           0 :         poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >= SRC_DENSITY_THRESHOLD)
    5511             :     {
    5512           0 :         iSrcOffset++;
    5513           0 :         return true;
    5514             :     }
    5515           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5516           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
    5517             :                  SRC_DENSITY_THRESHOLD)
    5518             :     {
    5519           0 :         iSrcOffset += nSrcXSize;
    5520           0 :         return true;
    5521             :     }
    5522           0 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5523           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
    5524             :                  SRC_DENSITY_THRESHOLD)
    5525             :     {
    5526           0 :         iSrcOffset--;
    5527           0 :         return true;
    5528             :     }
    5529           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5530           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
    5531             :                  SRC_DENSITY_THRESHOLD)
    5532             :     {
    5533           0 :         iSrcOffset -= nSrcXSize;
    5534           0 :         return true;
    5535             :     }
    5536             : 
    5537           0 :     return false;
    5538             : }
    5539             : 
    5540             : /************************************************************************/
    5541             : /*                           GWKGeneralCase()                           */
    5542             : /*                                                                      */
    5543             : /*      This is the most general case.  It attempts to handle all       */
    5544             : /*      possible features with relatively little concern for            */
    5545             : /*      efficiency.                                                     */
    5546             : /************************************************************************/
    5547             : 
    5548         243 : static void GWKGeneralCaseThread(void *pData)
    5549             : {
    5550         243 :     GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
    5551         243 :     GDALWarpKernel *poWK = psJob->poWK;
    5552         243 :     const int iYMin = psJob->iYMin;
    5553         243 :     const int iYMax = psJob->iYMax;
    5554             :     const double dfMultFactorVerticalShiftPipeline =
    5555         243 :         poWK->bApplyVerticalShift
    5556         243 :             ? CPLAtof(CSLFetchNameValueDef(
    5557           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5558             :                   "1.0"))
    5559         243 :             : 0.0;
    5560             : 
    5561         243 :     int nDstXSize = poWK->nDstXSize;
    5562         243 :     int nSrcXSize = poWK->nSrcXSize;
    5563         243 :     int nSrcYSize = poWK->nSrcYSize;
    5564             : 
    5565             :     /* -------------------------------------------------------------------- */
    5566             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5567             :     /*      scanlines worth of positions.                                   */
    5568             :     /* -------------------------------------------------------------------- */
    5569             :     // For x, 2 *, because we cache the precomputed values at the end.
    5570             :     double *padfX =
    5571         243 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5572             :     double *padfY =
    5573         243 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5574             :     double *padfZ =
    5575         243 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5576         243 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5577             : 
    5578         243 :     const bool bUse4SamplesFormula =
    5579         243 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    5580             : 
    5581         243 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5582         243 :     if (poWK->eResample != GRA_NearestNeighbour)
    5583             :     {
    5584         224 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5585             :     }
    5586         243 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5587         243 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5588         243 :     const double dfErrorThreshold = CPLAtof(
    5589         243 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5590             : 
    5591             :     const bool bOneSourceCornerFailsToReproject =
    5592         243 :         GWKOneSourceCornerFailsToReproject(psJob);
    5593             : 
    5594             :     // Precompute values.
    5595        6513 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5596        6270 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5597             : 
    5598             :     /* ==================================================================== */
    5599             :     /*      Loop over output lines.                                         */
    5600             :     /* ==================================================================== */
    5601        6513 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5602             :     {
    5603             :         /* --------------------------------------------------------------------
    5604             :          */
    5605             :         /*      Setup points to transform to source image space. */
    5606             :         /* --------------------------------------------------------------------
    5607             :          */
    5608        6270 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5609        6270 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5610      242830 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5611      236560 :             padfY[iDstX] = dfY;
    5612        6270 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5613             : 
    5614             :         /* --------------------------------------------------------------------
    5615             :          */
    5616             :         /*      Transform the points from destination pixel/line coordinates */
    5617             :         /*      to source pixel/line coordinates. */
    5618             :         /* --------------------------------------------------------------------
    5619             :          */
    5620        6270 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5621             :                              padfY, padfZ, pabSuccess);
    5622        6270 :         if (dfSrcCoordPrecision > 0.0)
    5623             :         {
    5624           0 :             GWKRoundSourceCoordinates(
    5625             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5626             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5627           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5628             :         }
    5629             : 
    5630             :         /* ====================================================================
    5631             :          */
    5632             :         /*      Loop over pixels in output scanline. */
    5633             :         /* ====================================================================
    5634             :          */
    5635      242830 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5636             :         {
    5637      236560 :             GPtrDiff_t iSrcOffset = 0;
    5638      236560 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5639             :                                               padfX, padfY, nSrcXSize,
    5640             :                                               nSrcYSize, iSrcOffset))
    5641           0 :                 continue;
    5642             : 
    5643             :             /* --------------------------------------------------------------------
    5644             :              */
    5645             :             /*      Do not try to apply transparent/invalid source pixels to the
    5646             :              */
    5647             :             /*      destination.  This currently ignores the multi-pixel input
    5648             :              */
    5649             :             /*      of bilinear and cubic resamples. */
    5650             :             /* --------------------------------------------------------------------
    5651             :              */
    5652      236560 :             double dfDensity = 1.0;
    5653             : 
    5654      236560 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5655             :             {
    5656        1200 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5657        1200 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    5658             :                 {
    5659           0 :                     if (!bOneSourceCornerFailsToReproject)
    5660             :                     {
    5661           0 :                         continue;
    5662             :                     }
    5663           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5664             :                                  psJob, iSrcOffset))
    5665             :                     {
    5666           0 :                         dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5667             :                     }
    5668             :                     else
    5669             :                     {
    5670           0 :                         continue;
    5671             :                     }
    5672             :                 }
    5673             :             }
    5674             : 
    5675      236560 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5676           0 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5677             :             {
    5678           0 :                 if (!bOneSourceCornerFailsToReproject)
    5679             :                 {
    5680           0 :                     continue;
    5681             :                 }
    5682           0 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5683             :                 {
    5684           0 :                     continue;
    5685             :                 }
    5686             :             }
    5687             : 
    5688             :             /* ====================================================================
    5689             :              */
    5690             :             /*      Loop processing each band. */
    5691             :             /* ====================================================================
    5692             :              */
    5693      236560 :             bool bHasFoundDensity = false;
    5694             : 
    5695      236560 :             const GPtrDiff_t iDstOffset =
    5696      236560 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5697      473120 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5698             :             {
    5699      236560 :                 double dfBandDensity = 0.0;
    5700      236560 :                 double dfValueReal = 0.0;
    5701      236560 :                 double dfValueImag = 0.0;
    5702             : 
    5703             :                 /* --------------------------------------------------------------------
    5704             :                  */
    5705             :                 /*      Collect the source value. */
    5706             :                 /* --------------------------------------------------------------------
    5707             :                  */
    5708      236560 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5709             :                     nSrcYSize == 1)
    5710             :                 {
    5711             :                     // FALSE is returned if dfBandDensity == 0, which is
    5712             :                     // checked below.
    5713         568 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValue(
    5714             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
    5715             :                         &dfValueImag));
    5716             :                 }
    5717      235992 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5718             :                 {
    5719         648 :                     GWKBilinearResample4Sample(
    5720         648 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5721         648 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5722             :                         &dfValueReal, &dfValueImag);
    5723             :                 }
    5724      235344 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5725             :                 {
    5726         248 :                     GWKCubicResample4Sample(
    5727         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5728         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5729             :                         &dfValueReal, &dfValueImag);
    5730             :                 }
    5731             :                 else
    5732             : #ifdef DEBUG
    5733             :                     // Only useful for clang static analyzer.
    5734      235096 :                     if (psWrkStruct != nullptr)
    5735             : #endif
    5736             :                     {
    5737      235096 :                         psWrkStruct->pfnGWKResample(
    5738      235096 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5739      235096 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5740             :                             &dfValueReal, &dfValueImag, psWrkStruct);
    5741             :                     }
    5742             : 
    5743             :                 // If we didn't find any valid inputs skip to next band.
    5744      236560 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5745           0 :                     continue;
    5746             : 
    5747      236560 :                 if (poWK->bApplyVerticalShift)
    5748             :                 {
    5749           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5750           0 :                         continue;
    5751             :                     // Subtract padfZ[] since the coordinate transformation is
    5752             :                     // from target to source
    5753           0 :                     dfValueReal =
    5754           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5755           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5756             :                 }
    5757             : 
    5758      236560 :                 bHasFoundDensity = true;
    5759             : 
    5760             :                 /* --------------------------------------------------------------------
    5761             :                  */
    5762             :                 /*      We have a computed value from the source.  Now apply it
    5763             :                  * to      */
    5764             :                 /*      the destination pixel. */
    5765             :                 /* --------------------------------------------------------------------
    5766             :                  */
    5767      236560 :                 GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    5768             :                                  dfValueReal, dfValueImag);
    5769             :             }
    5770             : 
    5771      236560 :             if (!bHasFoundDensity)
    5772           0 :                 continue;
    5773             : 
    5774             :             /* --------------------------------------------------------------------
    5775             :              */
    5776             :             /*      Update destination density/validity masks. */
    5777             :             /* --------------------------------------------------------------------
    5778             :              */
    5779      236560 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5780             : 
    5781      236560 :             if (poWK->panDstValid != nullptr)
    5782             :             {
    5783           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5784             :             }
    5785             :         } /* Next iDstX */
    5786             : 
    5787             :         /* --------------------------------------------------------------------
    5788             :          */
    5789             :         /*      Report progress to the user, and optionally cancel out. */
    5790             :         /* --------------------------------------------------------------------
    5791             :          */
    5792        6270 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5793           0 :             break;
    5794             :     }
    5795             : 
    5796             :     /* -------------------------------------------------------------------- */
    5797             :     /*      Cleanup and return.                                             */
    5798             :     /* -------------------------------------------------------------------- */
    5799         243 :     CPLFree(padfX);
    5800         243 :     CPLFree(padfY);
    5801         243 :     CPLFree(padfZ);
    5802         243 :     CPLFree(pabSuccess);
    5803         243 :     if (psWrkStruct)
    5804         224 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5805         243 : }
    5806             : 
    5807         243 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
    5808             : {
    5809         243 :     return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
    5810             : }
    5811             : 
    5812             : /************************************************************************/
    5813             : /*                            GWKRealCase()                             */
    5814             : /*                                                                      */
    5815             : /*      General case for non-complex data types.                        */
    5816             : /************************************************************************/
    5817             : 
    5818         133 : static void GWKRealCaseThread(void *pData)
    5819             : 
    5820             : {
    5821         133 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    5822         133 :     GDALWarpKernel *poWK = psJob->poWK;
    5823         133 :     const int iYMin = psJob->iYMin;
    5824         133 :     const int iYMax = psJob->iYMax;
    5825             : 
    5826         133 :     const int nDstXSize = poWK->nDstXSize;
    5827         133 :     const int nSrcXSize = poWK->nSrcXSize;
    5828         133 :     const int nSrcYSize = poWK->nSrcYSize;
    5829             :     const double dfMultFactorVerticalShiftPipeline =
    5830         133 :         poWK->bApplyVerticalShift
    5831         133 :             ? CPLAtof(CSLFetchNameValueDef(
    5832           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5833             :                   "1.0"))
    5834         133 :             : 0.0;
    5835             : 
    5836             :     /* -------------------------------------------------------------------- */
    5837             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5838             :     /*      scanlines worth of positions.                                   */
    5839             :     /* -------------------------------------------------------------------- */
    5840             : 
    5841             :     // For x, 2 *, because we cache the precomputed values at the end.
    5842             :     double *padfX =
    5843         133 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5844             :     double *padfY =
    5845         133 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5846             :     double *padfZ =
    5847         133 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5848         133 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5849             : 
    5850         133 :     const bool bUse4SamplesFormula =
    5851         133 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    5852             : 
    5853         133 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5854         133 :     if (poWK->eResample != GRA_NearestNeighbour)
    5855             :     {
    5856         117 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5857             :     }
    5858         133 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5859         133 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5860         133 :     const double dfErrorThreshold = CPLAtof(
    5861         133 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5862             : 
    5863         384 :     const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
    5864         251 :                                    poWK->papanBandSrcValid == nullptr &&
    5865         118 :                                    poWK->pafUnifiedSrcDensity != nullptr;
    5866             : 
    5867             :     const bool bOneSourceCornerFailsToReproject =
    5868         133 :         GWKOneSourceCornerFailsToReproject(psJob);
    5869             : 
    5870             :     // Precompute values.
    5871       18764 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5872       18631 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5873             : 
    5874             :     /* ==================================================================== */
    5875             :     /*      Loop over output lines.                                         */
    5876             :     /* ==================================================================== */
    5877       21515 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5878             :     {
    5879             :         /* --------------------------------------------------------------------
    5880             :          */
    5881             :         /*      Setup points to transform to source image space. */
    5882             :         /* --------------------------------------------------------------------
    5883             :          */
    5884       21382 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5885       21382 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5886    43456400 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5887    43435000 :             padfY[iDstX] = dfY;
    5888       21382 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5889             : 
    5890             :         /* --------------------------------------------------------------------
    5891             :          */
    5892             :         /*      Transform the points from destination pixel/line coordinates */
    5893             :         /*      to source pixel/line coordinates. */
    5894             :         /* --------------------------------------------------------------------
    5895             :          */
    5896       21382 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5897             :                              padfY, padfZ, pabSuccess);
    5898       21382 :         if (dfSrcCoordPrecision > 0.0)
    5899             :         {
    5900           0 :             GWKRoundSourceCoordinates(
    5901             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5902             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5903           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5904             :         }
    5905             : 
    5906             :         /* ====================================================================
    5907             :          */
    5908             :         /*      Loop over pixels in output scanline. */
    5909             :         /* ====================================================================
    5910             :          */
    5911    43456400 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5912             :         {
    5913    43435000 :             GPtrDiff_t iSrcOffset = 0;
    5914    43435000 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5915             :                                               padfX, padfY, nSrcXSize,
    5916             :                                               nSrcYSize, iSrcOffset))
    5917    42842700 :                 continue;
    5918             : 
    5919             :             /* --------------------------------------------------------------------
    5920             :              */
    5921             :             /*      Do not try to apply transparent/invalid source pixels to the
    5922             :              */
    5923             :             /*      destination.  This currently ignores the multi-pixel input
    5924             :              */
    5925             :             /*      of bilinear and cubic resamples. */
    5926             :             /* --------------------------------------------------------------------
    5927             :              */
    5928    31382600 :             double dfDensity = 1.0;
    5929             : 
    5930    31382600 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5931             :             {
    5932     1262880 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5933     1262880 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    5934             :                 {
    5935     1261590 :                     if (!bOneSourceCornerFailsToReproject)
    5936             :                     {
    5937     1261590 :                         continue;
    5938             :                     }
    5939           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5940             :                                  psJob, iSrcOffset))
    5941             :                     {
    5942           0 :                         dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5943             :                     }
    5944             :                     else
    5945             :                     {
    5946           0 :                         continue;
    5947             :                     }
    5948             :                 }
    5949             :             }
    5950             : 
    5951    59749600 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5952    29628600 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5953             :             {
    5954    29531000 :                 if (!bOneSourceCornerFailsToReproject)
    5955             :                 {
    5956    29528700 :                     continue;
    5957             :                 }
    5958        2229 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5959             :                 {
    5960           0 :                     continue;
    5961             :                 }
    5962             :             }
    5963             : 
    5964             :             /* ====================================================================
    5965             :              */
    5966             :             /*      Loop processing each band. */
    5967             :             /* ====================================================================
    5968             :              */
    5969      592300 :             bool bHasFoundDensity = false;
    5970             : 
    5971      592300 :             const GPtrDiff_t iDstOffset =
    5972      592300 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5973     1516060 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5974             :             {
    5975      923761 :                 double dfBandDensity = 0.0;
    5976      923761 :                 double dfValueReal = 0.0;
    5977             : 
    5978             :                 /* --------------------------------------------------------------------
    5979             :                  */
    5980             :                 /*      Collect the source value. */
    5981             :                 /* --------------------------------------------------------------------
    5982             :                  */
    5983      923761 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5984             :                     nSrcYSize == 1)
    5985             :                 {
    5986             :                     // FALSE is returned if dfBandDensity == 0, which is
    5987             :                     // checked below.
    5988        1012 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
    5989             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
    5990             :                 }
    5991      922749 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5992             :                 {
    5993        1326 :                     double dfValueImagIgnored = 0.0;
    5994        1326 :                     GWKBilinearResample4Sample(
    5995        1326 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5996        1326 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5997        1326 :                         &dfValueReal, &dfValueImagIgnored);
    5998             :                 }
    5999      921423 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    6000             :                 {
    6001      299992 :                     if (bSrcMaskIsDensity)
    6002             :                     {
    6003         361 :                         if (poWK->eWorkingDataType == GDT_Byte)
    6004             :                         {
    6005         361 :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
    6006         361 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6007         361 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6008             :                                 &dfValueReal);
    6009             :                         }
    6010           0 :                         else if (poWK->eWorkingDataType == GDT_UInt16)
    6011             :                         {
    6012             :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<
    6013           0 :                                 GUInt16>(poWK, iBand,
    6014           0 :                                          padfX[iDstX] - poWK->nSrcXOff,
    6015           0 :                                          padfY[iDstX] - poWK->nSrcYOff,
    6016             :                                          &dfBandDensity, &dfValueReal);
    6017             :                         }
    6018             :                         else
    6019             :                         {
    6020           0 :                             GWKCubicResampleSrcMaskIsDensity4SampleReal(
    6021           0 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6022           0 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6023             :                                 &dfValueReal);
    6024             :                         }
    6025             :                     }
    6026             :                     else
    6027             :                     {
    6028      299631 :                         double dfValueImagIgnored = 0.0;
    6029      299631 :                         GWKCubicResample4Sample(
    6030      299631 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6031      299631 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6032             :                             &dfValueReal, &dfValueImagIgnored);
    6033      299992 :                     }
    6034             :                 }
    6035             :                 else
    6036             : #ifdef DEBUG
    6037             :                     // Only useful for clang static analyzer.
    6038      621431 :                     if (psWrkStruct != nullptr)
    6039             : #endif
    6040             :                     {
    6041      621431 :                         double dfValueImagIgnored = 0.0;
    6042      621431 :                         psWrkStruct->pfnGWKResample(
    6043      621431 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6044      621431 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6045             :                             &dfValueReal, &dfValueImagIgnored, psWrkStruct);
    6046             :                     }
    6047             : 
    6048             :                 // If we didn't find any valid inputs skip to next band.
    6049      923761 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    6050           0 :                     continue;
    6051             : 
    6052      923761 :                 if (poWK->bApplyVerticalShift)
    6053             :                 {
    6054           0 :                     if (!std::isfinite(padfZ[iDstX]))
    6055           0 :                         continue;
    6056             :                     // Subtract padfZ[] since the coordinate transformation is
    6057             :                     // from target to source
    6058           0 :                     dfValueReal =
    6059           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    6060           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    6061             :                 }
    6062             : 
    6063      923761 :                 bHasFoundDensity = true;
    6064             : 
    6065             :                 /* --------------------------------------------------------------------
    6066             :                  */
    6067             :                 /*      We have a computed value from the source.  Now apply it
    6068             :                  * to      */
    6069             :                 /*      the destination pixel. */
    6070             :                 /* --------------------------------------------------------------------
    6071             :                  */
    6072      923761 :                 GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
    6073             :                                      dfValueReal);
    6074             :             }
    6075             : 
    6076      592300 :             if (!bHasFoundDensity)
    6077           0 :                 continue;
    6078             : 
    6079             :             /* --------------------------------------------------------------------
    6080             :              */
    6081             :             /*      Update destination density/validity masks. */
    6082             :             /* --------------------------------------------------------------------
    6083             :              */
    6084      592300 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6085             : 
    6086      592300 :             if (poWK->panDstValid != nullptr)
    6087             :             {
    6088      101460 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6089             :             }
    6090             :         }  // Next iDstX.
    6091             : 
    6092             :         /* --------------------------------------------------------------------
    6093             :          */
    6094             :         /*      Report progress to the user, and optionally cancel out. */
    6095             :         /* --------------------------------------------------------------------
    6096             :          */
    6097       21382 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6098           0 :             break;
    6099             :     }
    6100             : 
    6101             :     /* -------------------------------------------------------------------- */
    6102             :     /*      Cleanup and return.                                             */
    6103             :     /* -------------------------------------------------------------------- */
    6104         133 :     CPLFree(padfX);
    6105         133 :     CPLFree(padfY);
    6106         133 :     CPLFree(padfZ);
    6107         133 :     CPLFree(pabSuccess);
    6108         133 :     if (psWrkStruct)
    6109         117 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    6110         133 : }
    6111             : 
    6112         133 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
    6113             : {
    6114         133 :     return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
    6115             : }
    6116             : 
    6117             : /************************************************************************/
    6118             : /*                 GWKCubicResampleNoMasks4MultiBandT()                 */
    6119             : /************************************************************************/
    6120             : 
    6121             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    6122             : /* and enough SSE registries */
    6123             : #if defined(__x86_64) || defined(_M_X64)
    6124             : 
    6125      238596 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
    6126             :                                  const __m128 row2, const __m128 row3,
    6127             :                                  const __m128 weightsXY0,
    6128             :                                  const __m128 weightsXY1,
    6129             :                                  const __m128 weightsXY2,
    6130             :                                  const __m128 weightsXY3)
    6131             : {
    6132     1670170 :     return XMMHorizontalAdd(_mm_add_ps(
    6133             :         _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
    6134             :         _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
    6135      238596 :                    _mm_mul_ps(row3, weightsXY3))));
    6136             : }
    6137             : 
    6138             : template <class T>
    6139       81323 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
    6140             :                                                double dfSrcX, double dfSrcY,
    6141             :                                                const GPtrDiff_t iDstOffset)
    6142             : {
    6143       81323 :     const double dfSrcXShifted = dfSrcX - 0.5;
    6144       81323 :     const int iSrcX = static_cast<int>(dfSrcXShifted);
    6145       81323 :     const double dfSrcYShifted = dfSrcY - 0.5;
    6146       81323 :     const int iSrcY = static_cast<int>(dfSrcYShifted);
    6147       81323 :     const GPtrDiff_t iSrcOffset =
    6148       81323 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    6149             : 
    6150             :     // Get the bilinear interpolation at the image borders.
    6151       81323 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    6152       80326 :         iSrcY + 2 >= poWK->nSrcYSize)
    6153             :     {
    6154        7164 :         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6155             :         {
    6156             :             T value;
    6157        5373 :             GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    6158             :                                                &value);
    6159        5373 :             reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6160             :                 value;
    6161        1791 :         }
    6162             :     }
    6163             :     else
    6164             :     {
    6165       79532 :         const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
    6166       79532 :         const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
    6167             : 
    6168             :         float afCoeffsX[4];
    6169             :         float afCoeffsY[4];
    6170       79532 :         GWKCubicComputeWeights(fDeltaX, afCoeffsX);
    6171       79532 :         GWKCubicComputeWeights(fDeltaY, afCoeffsY);
    6172       79532 :         const auto weightsX = _mm_loadu_ps(afCoeffsX);
    6173             :         const auto weightsXY0 =
    6174      159064 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
    6175             :         const auto weightsXY1 =
    6176      159064 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
    6177             :         const auto weightsXY2 =
    6178      159064 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
    6179             :         const auto weightsXY3 =
    6180       79532 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
    6181             : 
    6182       79532 :         const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
    6183             : 
    6184       79532 :         int iBand = 0;
    6185             :         // Process 2 bands at a time
    6186      159064 :         for (; iBand + 1 < poWK->nBands; iBand += 2)
    6187             :         {
    6188       79532 :             const T *CPL_RESTRICT pBand0 =
    6189       79532 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6190       79532 :             const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
    6191             :             const auto row1_0 =
    6192       79532 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6193             :             const auto row2_0 =
    6194       79532 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6195             :             const auto row3_0 =
    6196       79532 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6197             : 
    6198       79532 :             const T *CPL_RESTRICT pBand1 =
    6199       79532 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
    6200       79532 :             const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
    6201             :             const auto row1_1 =
    6202       79532 :                 XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
    6203             :             const auto row2_1 =
    6204       79532 :                 XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
    6205             :             const auto row3_1 =
    6206       79532 :                 XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
    6207             : 
    6208             :             const float fValue_0 =
    6209       79532 :                 Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
    6210             :                              weightsXY1, weightsXY2, weightsXY3);
    6211             : 
    6212             :             const float fValue_1 =
    6213       79532 :                 Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
    6214             :                              weightsXY1, weightsXY2, weightsXY3);
    6215             : 
    6216       79532 :             T *CPL_RESTRICT pDstBand0 =
    6217       79532 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6218       79532 :             pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
    6219             : 
    6220       79532 :             T *CPL_RESTRICT pDstBand1 =
    6221       79532 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
    6222       79532 :             pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
    6223             :         }
    6224       79532 :         if (iBand < poWK->nBands)
    6225             :         {
    6226       79532 :             const T *CPL_RESTRICT pBand0 =
    6227       79532 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6228       79532 :             const auto row0 = XMMLoad4Values(pBand0 + iOffset);
    6229             :             const auto row1 =
    6230       79532 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6231             :             const auto row2 =
    6232       79532 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6233             :             const auto row3 =
    6234       79532 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6235             : 
    6236             :             const float fValue =
    6237       79532 :                 Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
    6238             :                              weightsXY2, weightsXY3);
    6239             : 
    6240       79532 :             T *CPL_RESTRICT pDstBand =
    6241       79532 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6242       79532 :             pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
    6243             :         }
    6244             :     }
    6245             : 
    6246       81323 :     if (poWK->pafDstDensity)
    6247         441 :         poWK->pafDstDensity[iDstOffset] = 1.0f;
    6248       81323 : }
    6249             : 
    6250             : #endif  // defined(__x86_64) || defined(_M_X64)
    6251             : 
    6252             : /************************************************************************/
    6253             : /*                GWKResampleNoMasksOrDstDensityOnlyThreadInternal()    */
    6254             : /************************************************************************/
    6255             : 
    6256             : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
    6257        1170 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
    6258             : 
    6259             : {
    6260        1170 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6261        1170 :     GDALWarpKernel *poWK = psJob->poWK;
    6262        1170 :     const int iYMin = psJob->iYMin;
    6263        1170 :     const int iYMax = psJob->iYMax;
    6264        1152 :     const double dfMultFactorVerticalShiftPipeline =
    6265        1170 :         poWK->bApplyVerticalShift
    6266          18 :             ? CPLAtof(CSLFetchNameValueDef(
    6267          18 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6268             :                   "1.0"))
    6269             :             : 0.0;
    6270             : 
    6271        1170 :     const int nDstXSize = poWK->nDstXSize;
    6272        1170 :     const int nSrcXSize = poWK->nSrcXSize;
    6273        1170 :     const int nSrcYSize = poWK->nSrcYSize;
    6274             : 
    6275             :     /* -------------------------------------------------------------------- */
    6276             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6277             :     /*      scanlines worth of positions.                                   */
    6278             :     /* -------------------------------------------------------------------- */
    6279             : 
    6280             :     // For x, 2 *, because we cache the precomputed values at the end.
    6281             :     double *padfX =
    6282        1170 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6283             :     double *padfY =
    6284        1170 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6285             :     double *padfZ =
    6286        1170 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6287        1170 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6288             : 
    6289        1170 :     const int nXRadius = poWK->nXRadius;
    6290             :     double *padfWeightsX =
    6291        1170 :         static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
    6292             :     double *padfWeightsY = static_cast<double *>(
    6293        1170 :         CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
    6294        1170 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6295        1170 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6296        1170 :     const double dfErrorThreshold = CPLAtof(
    6297        1170 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6298             : 
    6299             :     // Precompute values.
    6300      254594 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6301      253424 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6302             : 
    6303             :     /* ==================================================================== */
    6304             :     /*      Loop over output lines.                                         */
    6305             :     /* ==================================================================== */
    6306      129808 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6307             :     {
    6308             :         /* --------------------------------------------------------------------
    6309             :          */
    6310             :         /*      Setup points to transform to source image space. */
    6311             :         /* --------------------------------------------------------------------
    6312             :          */
    6313      128639 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6314      128639 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6315    58383044 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6316    58254436 :             padfY[iDstX] = dfY;
    6317      128639 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6318             : 
    6319             :         /* --------------------------------------------------------------------
    6320             :          */
    6321             :         /*      Transform the points from destination pixel/line coordinates */
    6322             :         /*      to source pixel/line coordinates. */
    6323             :         /* --------------------------------------------------------------------
    6324             :          */
    6325      128639 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6326             :                              padfY, padfZ, pabSuccess);
    6327      128639 :         if (dfSrcCoordPrecision > 0.0)
    6328             :         {
    6329        1000 :             GWKRoundSourceCoordinates(
    6330             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6331             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6332        1000 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6333             :         }
    6334             : 
    6335             :         /* ====================================================================
    6336             :          */
    6337             :         /*      Loop over pixels in output scanline. */
    6338             :         /* ====================================================================
    6339             :          */
    6340    58292984 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6341             :         {
    6342    58164366 :             GPtrDiff_t iSrcOffset = 0;
    6343    58164366 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6344             :                                               padfX, padfY, nSrcXSize,
    6345             :                                               nSrcYSize, iSrcOffset))
    6346     6540862 :                 continue;
    6347             : 
    6348             :             /* ====================================================================
    6349             :              */
    6350             :             /*      Loop processing each band. */
    6351             :             /* ====================================================================
    6352             :              */
    6353    51491452 :             const GPtrDiff_t iDstOffset =
    6354    51491452 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6355             : 
    6356             : #if defined(__x86_64) || defined(_M_X64)
    6357             :             if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
    6358             :                           (std::is_same<T, GByte>::value ||
    6359             :                            std::is_same<T, GUInt16>::value))
    6360             :             {
    6361      752574 :                 if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
    6362             :                 {
    6363       81323 :                     GWKCubicResampleNoMasks4MultiBandT<T>(
    6364       81323 :                         poWK, padfX[iDstX] - poWK->nSrcXOff,
    6365       81323 :                         padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
    6366             : 
    6367       81323 :                     continue;
    6368             :                 }
    6369             :             }
    6370             : #endif  // defined(__x86_64) || defined(_M_X64)
    6371             : 
    6372    51410129 :             [[maybe_unused]] double dfInvWeights = 0;
    6373   144108168 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6374             :             {
    6375    92484757 :                 T value = 0;
    6376             :                 if constexpr (eResample == GRA_NearestNeighbour)
    6377             :                 {
    6378    76588049 :                     value = reinterpret_cast<T *>(
    6379    76588049 :                         poWK->papabySrcImage[iBand])[iSrcOffset];
    6380             :                 }
    6381             :                 else if constexpr (bUse4SamplesFormula)
    6382             :                 {
    6383             :                     if constexpr (eResample == GRA_Bilinear)
    6384     4806176 :                         GWKBilinearResampleNoMasks4SampleT(
    6385     4806176 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6386     4806176 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6387             :                     else
    6388     1906603 :                         GWKCubicResampleNoMasks4SampleT(
    6389     1906603 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6390     1906603 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6391             :                 }
    6392             :                 else
    6393             :                 {
    6394     9183929 :                     GWKResampleNoMasksT(
    6395     9183929 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6396     9183929 :                         padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
    6397             :                         padfWeightsY, dfInvWeights);
    6398             :                 }
    6399             : 
    6400    92481547 :                 if (poWK->bApplyVerticalShift)
    6401             :                 {
    6402         818 :                     if (!std::isfinite(padfZ[iDstX]))
    6403           0 :                         continue;
    6404             :                     // Subtract padfZ[] since the coordinate transformation is
    6405             :                     // from target to source
    6406      219426 :                     value = GWKClampValueT<T>(
    6407         818 :                         value * poWK->dfMultFactorVerticalShift -
    6408         818 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6409             :                 }
    6410             : 
    6411    92698117 :                 if (poWK->pafDstDensity)
    6412    11712299 :                     poWK->pafDstDensity[iDstOffset] = 1.0f;
    6413             : 
    6414    92698117 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6415             :                     value;
    6416             :             }
    6417             :         }
    6418             : 
    6419             :         /* --------------------------------------------------------------------
    6420             :          */
    6421             :         /*      Report progress to the user, and optionally cancel out. */
    6422             :         /* --------------------------------------------------------------------
    6423             :          */
    6424      128639 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6425           1 :             break;
    6426             :     }
    6427             : 
    6428             :     /* -------------------------------------------------------------------- */
    6429             :     /*      Cleanup and return.                                             */
    6430             :     /* -------------------------------------------------------------------- */
    6431        1170 :     CPLFree(padfX);
    6432        1170 :     CPLFree(padfY);
    6433        1170 :     CPLFree(padfZ);
    6434        1170 :     CPLFree(pabSuccess);
    6435        1170 :     CPLFree(padfWeightsX);
    6436        1170 :     CPLFree(padfWeightsY);
    6437        1170 : }
    6438             : 
    6439             : template <class T, GDALResampleAlg eResample>
    6440         915 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
    6441             : {
    6442         915 :     GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6443             :         pData);
    6444         915 : }
    6445             : 
    6446             : template <class T, GDALResampleAlg eResample>
    6447         255 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
    6448             : 
    6449             : {
    6450         255 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6451         255 :     GDALWarpKernel *poWK = psJob->poWK;
    6452             :     static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
    6453         255 :     const bool bUse4SamplesFormula =
    6454         255 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    6455         255 :     if (bUse4SamplesFormula)
    6456         155 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
    6457             :             pData);
    6458             :     else
    6459         100 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6460             :             pData);
    6461         255 : }
    6462             : 
    6463         860 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6464             : {
    6465         860 :     return GWKRun(
    6466             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
    6467         860 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
    6468             : }
    6469             : 
    6470         125 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6471             : {
    6472         125 :     return GWKRun(
    6473             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
    6474             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
    6475         125 :                                                            GRA_Bilinear>);
    6476             : }
    6477             : 
    6478          72 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6479             : {
    6480          72 :     return GWKRun(
    6481             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
    6482          72 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
    6483             : }
    6484             : 
    6485           9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6486             : {
    6487           9 :     return GWKRun(
    6488             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
    6489           9 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
    6490             : }
    6491             : 
    6492             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6493             : 
    6494             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6495             : {
    6496             :     return GWKRun(
    6497             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
    6498             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
    6499             : }
    6500             : #endif
    6501             : 
    6502          12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6503             : {
    6504          12 :     return GWKRun(
    6505             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
    6506          12 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
    6507             : }
    6508             : 
    6509             : /************************************************************************/
    6510             : /*                          GWKNearestByte()                            */
    6511             : /*                                                                      */
    6512             : /*      Case for 8bit input data with nearest neighbour resampling      */
    6513             : /*      using valid flags. Should be as fast as possible for this       */
    6514             : /*      particular transformation type.                                 */
    6515             : /************************************************************************/
    6516             : 
    6517         343 : template <class T> static void GWKNearestThread(void *pData)
    6518             : 
    6519             : {
    6520         343 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6521         343 :     GDALWarpKernel *poWK = psJob->poWK;
    6522         343 :     const int iYMin = psJob->iYMin;
    6523         343 :     const int iYMax = psJob->iYMax;
    6524         343 :     const double dfMultFactorVerticalShiftPipeline =
    6525         343 :         poWK->bApplyVerticalShift
    6526           0 :             ? CPLAtof(CSLFetchNameValueDef(
    6527           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6528             :                   "1.0"))
    6529             :             : 0.0;
    6530             : 
    6531         343 :     const int nDstXSize = poWK->nDstXSize;
    6532         343 :     const int nSrcXSize = poWK->nSrcXSize;
    6533         343 :     const int nSrcYSize = poWK->nSrcYSize;
    6534             : 
    6535             :     /* -------------------------------------------------------------------- */
    6536             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6537             :     /*      scanlines worth of positions.                                   */
    6538             :     /* -------------------------------------------------------------------- */
    6539             : 
    6540             :     // For x, 2 *, because we cache the precomputed values at the end.
    6541             :     double *padfX =
    6542         343 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6543             :     double *padfY =
    6544         343 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6545             :     double *padfZ =
    6546         343 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6547         343 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6548             : 
    6549         343 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6550         343 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6551         343 :     const double dfErrorThreshold = CPLAtof(
    6552         343 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6553             : 
    6554             :     const bool bOneSourceCornerFailsToReproject =
    6555         343 :         GWKOneSourceCornerFailsToReproject(psJob);
    6556             : 
    6557             :     // Precompute values.
    6558       49707 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6559       49364 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6560             : 
    6561             :     /* ==================================================================== */
    6562             :     /*      Loop over output lines.                                         */
    6563             :     /* ==================================================================== */
    6564       37157 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6565             :     {
    6566             : 
    6567             :         /* --------------------------------------------------------------------
    6568             :          */
    6569             :         /*      Setup points to transform to source image space. */
    6570             :         /* --------------------------------------------------------------------
    6571             :          */
    6572       36814 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6573       36814 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6574     7743095 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6575     7706282 :             padfY[iDstX] = dfY;
    6576       36814 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6577             : 
    6578             :         /* --------------------------------------------------------------------
    6579             :          */
    6580             :         /*      Transform the points from destination pixel/line coordinates */
    6581             :         /*      to source pixel/line coordinates. */
    6582             :         /* --------------------------------------------------------------------
    6583             :          */
    6584       36814 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6585             :                              padfY, padfZ, pabSuccess);
    6586       36814 :         if (dfSrcCoordPrecision > 0.0)
    6587             :         {
    6588           0 :             GWKRoundSourceCoordinates(
    6589             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6590             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6591           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6592             :         }
    6593             :         /* ====================================================================
    6594             :          */
    6595             :         /*      Loop over pixels in output scanline. */
    6596             :         /* ====================================================================
    6597             :          */
    6598     7743095 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6599             :         {
    6600     7706282 :             GPtrDiff_t iSrcOffset = 0;
    6601     7706282 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6602             :                                               padfX, padfY, nSrcXSize,
    6603             :                                               nSrcYSize, iSrcOffset))
    6604     2164638 :                 continue;
    6605             : 
    6606             :             /* --------------------------------------------------------------------
    6607             :              */
    6608             :             /*      Do not try to apply invalid source pixels to the dest. */
    6609             :             /* --------------------------------------------------------------------
    6610             :              */
    6611     7524668 :             if (poWK->panUnifiedSrcValid != nullptr &&
    6612      931241 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6613             :             {
    6614       49670 :                 if (!bOneSourceCornerFailsToReproject)
    6615             :                 {
    6616       42185 :                     continue;
    6617             :                 }
    6618        7485 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    6619             :                 {
    6620        5224 :                     continue;
    6621             :                 }
    6622             :             }
    6623             : 
    6624             :             /* --------------------------------------------------------------------
    6625             :              */
    6626             :             /*      Do not try to apply transparent source pixels to the
    6627             :              * destination.*/
    6628             :             /* --------------------------------------------------------------------
    6629             :              */
    6630     6546016 :             double dfDensity = 1.0;
    6631             : 
    6632     6546016 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    6633             :             {
    6634     1162245 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    6635     1162245 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    6636     1004371 :                     continue;
    6637             :             }
    6638             : 
    6639             :             /* ====================================================================
    6640             :              */
    6641             :             /*      Loop processing each band. */
    6642             :             /* ====================================================================
    6643             :              */
    6644             : 
    6645     5541654 :             const GPtrDiff_t iDstOffset =
    6646     5541654 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6647             : 
    6648    12873738 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6649             :             {
    6650     7332114 :                 T value = 0;
    6651     7332114 :                 double dfBandDensity = 0.0;
    6652             : 
    6653             :                 /* --------------------------------------------------------------------
    6654             :                  */
    6655             :                 /*      Collect the source value. */
    6656             :                 /* --------------------------------------------------------------------
    6657             :                  */
    6658     7332114 :                 if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
    6659             :                                  &value))
    6660             :                 {
    6661             : 
    6662     7332104 :                     if (poWK->bApplyVerticalShift)
    6663             :                     {
    6664           0 :                         if (!std::isfinite(padfZ[iDstX]))
    6665           0 :                             continue;
    6666             :                         // Subtract padfZ[] since the coordinate transformation
    6667             :                         // is from target to source
    6668           0 :                         value = GWKClampValueT<T>(
    6669           0 :                             value * poWK->dfMultFactorVerticalShift -
    6670           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6671             :                     }
    6672             : 
    6673     7332104 :                     if (dfBandDensity < 1.0)
    6674             :                     {
    6675      159076 :                         if (dfBandDensity == 0.0)
    6676             :                         {
    6677             :                             // Do nothing.
    6678             :                         }
    6679             :                         else
    6680             :                         {
    6681             :                             // Let the general code take care of mixing.
    6682      159076 :                             GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    6683             :                                                   dfBandDensity, value);
    6684             :                         }
    6685             :                     }
    6686             :                     else
    6687             :                     {
    6688     7173023 :                         reinterpret_cast<T *>(
    6689     7173023 :                             poWK->papabyDstImage[iBand])[iDstOffset] = value;
    6690             :                     }
    6691             :                 }
    6692             :             }
    6693             : 
    6694             :             /* --------------------------------------------------------------------
    6695             :              */
    6696             :             /*      Mark this pixel valid/opaque in the output. */
    6697             :             /* --------------------------------------------------------------------
    6698             :              */
    6699     5541654 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6700             : 
    6701     5541654 :             if (poWK->panDstValid != nullptr)
    6702             :             {
    6703     4862206 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6704             :             }
    6705             :         } /* Next iDstX */
    6706             : 
    6707             :         /* --------------------------------------------------------------------
    6708             :          */
    6709             :         /*      Report progress to the user, and optionally cancel out. */
    6710             :         /* --------------------------------------------------------------------
    6711             :          */
    6712       36814 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6713           0 :             break;
    6714             :     }
    6715             : 
    6716             :     /* -------------------------------------------------------------------- */
    6717             :     /*      Cleanup and return.                                             */
    6718             :     /* -------------------------------------------------------------------- */
    6719         343 :     CPLFree(padfX);
    6720         343 :     CPLFree(padfY);
    6721         343 :     CPLFree(padfZ);
    6722         343 :     CPLFree(pabSuccess);
    6723         343 : }
    6724             : 
    6725         276 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
    6726             : {
    6727         276 :     return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
    6728             : }
    6729             : 
    6730          18 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6731             : {
    6732          18 :     return GWKRun(
    6733             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
    6734          18 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
    6735             : }
    6736             : 
    6737          18 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6738             : {
    6739          18 :     return GWKRun(
    6740             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
    6741             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
    6742          18 :                                                            GRA_Bilinear>);
    6743             : }
    6744             : 
    6745           6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6746             : {
    6747           6 :     return GWKRun(
    6748             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
    6749             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
    6750           6 :                                                            GRA_Bilinear>);
    6751             : }
    6752             : 
    6753           5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6754             : {
    6755           5 :     return GWKRun(
    6756             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
    6757             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
    6758           5 :                                                            GRA_Bilinear>);
    6759             : }
    6760             : 
    6761             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6762             : 
    6763             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6764             : {
    6765             :     return GWKRun(
    6766             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
    6767             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
    6768             :                                                            GRA_Bilinear>);
    6769             : }
    6770             : #endif
    6771             : 
    6772           5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6773             : {
    6774           5 :     return GWKRun(
    6775             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
    6776           5 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
    6777             : }
    6778             : 
    6779          12 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6780             : {
    6781          12 :     return GWKRun(
    6782             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
    6783          12 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
    6784             : }
    6785             : 
    6786           6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6787             : {
    6788           6 :     return GWKRun(
    6789             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
    6790           6 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
    6791             : }
    6792             : 
    6793           5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6794             : {
    6795           5 :     return GWKRun(
    6796             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
    6797           5 :         GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
    6798             : }
    6799             : 
    6800          27 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
    6801             : {
    6802          27 :     return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
    6803             : }
    6804             : 
    6805          11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6806             : {
    6807          11 :     return GWKRun(
    6808             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
    6809          11 :         GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
    6810             : }
    6811             : 
    6812          36 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
    6813             : {
    6814          36 :     return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
    6815             : }
    6816             : 
    6817             : /************************************************************************/
    6818             : /*                           GWKAverageOrMode()                         */
    6819             : /*                                                                      */
    6820             : /************************************************************************/
    6821             : 
    6822             : static void GWKAverageOrModeThread(void *pData);
    6823             : 
    6824         118 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
    6825             : {
    6826         118 :     return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
    6827             : }
    6828             : 
    6829             : // Overall logic based on GWKGeneralCaseThread().
    6830         118 : static void GWKAverageOrModeThread(void *pData)
    6831             : {
    6832         118 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6833         118 :     GDALWarpKernel *poWK = psJob->poWK;
    6834         118 :     const int iYMin = psJob->iYMin;
    6835         118 :     const int iYMax = psJob->iYMax;
    6836             :     const double dfMultFactorVerticalShiftPipeline =
    6837         118 :         poWK->bApplyVerticalShift
    6838         118 :             ? CPLAtof(CSLFetchNameValueDef(
    6839           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6840             :                   "1.0"))
    6841         118 :             : 0.0;
    6842             : 
    6843         118 :     const int nDstXSize = poWK->nDstXSize;
    6844         118 :     const int nSrcXSize = poWK->nSrcXSize;
    6845         118 :     const int nSrcYSize = poWK->nSrcYSize;
    6846             : 
    6847             :     /* -------------------------------------------------------------------- */
    6848             :     /*      Find out which algorithm to use (small optim.)                  */
    6849             :     /* -------------------------------------------------------------------- */
    6850         118 :     int nAlgo = 0;
    6851             : 
    6852             :     // These vars only used with nAlgo == 3.
    6853         118 :     int *panVals = nullptr;
    6854         118 :     int nBins = 0;
    6855         118 :     int nBinsOffset = 0;
    6856             : 
    6857             :     // Only used with nAlgo = 2.
    6858         118 :     float *pafRealVals = nullptr;
    6859         118 :     float *pafImagVals = nullptr;
    6860         118 :     int *panRealSums = nullptr;
    6861         118 :     int *panImagSums = nullptr;
    6862             : 
    6863             :     // Only used with nAlgo = 6.
    6864         118 :     float quant = 0.5;
    6865             : 
    6866             :     // To control array allocation only when data type is complex
    6867         118 :     const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
    6868             : 
    6869         118 :     if (poWK->eResample == GRA_Average)
    6870             :     {
    6871          71 :         nAlgo = GWKAOM_Average;
    6872             :     }
    6873          47 :     else if (poWK->eResample == GRA_RMS)
    6874             :     {
    6875           9 :         nAlgo = GWKAOM_RMS;
    6876             :     }
    6877          38 :     else if (poWK->eResample == GRA_Mode)
    6878             :     {
    6879             :         // TODO check color table count > 256.
    6880          11 :         if (poWK->eWorkingDataType == GDT_Byte ||
    6881           5 :             poWK->eWorkingDataType == GDT_UInt16 ||
    6882           5 :             poWK->eWorkingDataType == GDT_Int16)
    6883             :         {
    6884           9 :             nAlgo = GWKAOM_Imode;
    6885             : 
    6886             :             // In the case of a paletted or non-paletted byte band,
    6887             :             // Input values are between 0 and 255.
    6888           9 :             if (poWK->eWorkingDataType == GDT_Byte)
    6889             :             {
    6890           6 :                 nBins = 256;
    6891             :             }
    6892             :             // In the case of Int8, input values are between -128 and 127.
    6893           3 :             else if (poWK->eWorkingDataType == GDT_Int8)
    6894             :             {
    6895           0 :                 nBins = 256;
    6896           0 :                 nBinsOffset = 128;
    6897             :             }
    6898             :             // In the case of Int16, input values are between -32768 and 32767.
    6899           3 :             else if (poWK->eWorkingDataType == GDT_Int16)
    6900             :             {
    6901           3 :                 nBins = 65536;
    6902           3 :                 nBinsOffset = 32768;
    6903             :             }
    6904             :             // In the case of UInt16, input values are between 0 and 65537.
    6905           0 :             else if (poWK->eWorkingDataType == GDT_UInt16)
    6906             :             {
    6907           0 :                 nBins = 65536;
    6908             :             }
    6909             :             panVals =
    6910           9 :                 static_cast<int *>(VSI_MALLOC_VERBOSE(nBins * sizeof(int)));
    6911           9 :             if (panVals == nullptr)
    6912           0 :                 return;
    6913             :         }
    6914             :         else
    6915             :         {
    6916           2 :             nAlgo = GWKAOM_Fmode;
    6917             : 
    6918           2 :             if (nSrcXSize > 0 && nSrcYSize > 0)
    6919             :             {
    6920             :                 pafRealVals = static_cast<float *>(
    6921           2 :                     VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    6922             :                 panRealSums = static_cast<int *>(
    6923           2 :                     VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(int)));
    6924           2 :                 if (pafRealVals == nullptr || panRealSums == nullptr)
    6925             :                 {
    6926           0 :                     VSIFree(pafRealVals);
    6927           0 :                     VSIFree(panRealSums);
    6928           0 :                     return;
    6929             :                 }
    6930             :             }
    6931             :         }
    6932             :     }
    6933          27 :     else if (poWK->eResample == GRA_Max)
    6934             :     {
    6935           6 :         nAlgo = GWKAOM_Max;
    6936             :     }
    6937          21 :     else if (poWK->eResample == GRA_Min)
    6938             :     {
    6939           5 :         nAlgo = GWKAOM_Min;
    6940             :     }
    6941          16 :     else if (poWK->eResample == GRA_Med)
    6942             :     {
    6943           6 :         nAlgo = GWKAOM_Quant;
    6944           6 :         quant = 0.5;
    6945             :     }
    6946          10 :     else if (poWK->eResample == GRA_Q1)
    6947             :     {
    6948           5 :         nAlgo = GWKAOM_Quant;
    6949           5 :         quant = 0.25;
    6950             :     }
    6951           5 :     else if (poWK->eResample == GRA_Q3)
    6952             :     {
    6953           5 :         nAlgo = GWKAOM_Quant;
    6954           5 :         quant = 0.75;
    6955             :     }
    6956             : #ifdef disabled
    6957             :     else if (poWK->eResample == GRA_Sum)
    6958             :     {
    6959             :         nAlgo = GWKAOM_Sum;
    6960             :     }
    6961             : #endif
    6962             :     else
    6963             :     {
    6964             :         // Other resample algorithms not permitted here.
    6965           0 :         CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
    6966             :                          "illegal resample");
    6967           0 :         return;
    6968             :     }
    6969             : 
    6970         118 :     CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() using algo %d",
    6971             :              nAlgo);
    6972             : 
    6973             :     /* -------------------------------------------------------------------- */
    6974             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    6975             :     /*      scanlines worth of positions.                                   */
    6976             :     /* -------------------------------------------------------------------- */
    6977             : 
    6978             :     double *padfX =
    6979         118 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6980             :     double *padfY =
    6981         118 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6982             :     double *padfZ =
    6983         118 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6984             :     double *padfX2 =
    6985         118 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6986             :     double *padfY2 =
    6987         118 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6988             :     double *padfZ2 =
    6989         118 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6990         118 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6991         118 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6992             : 
    6993         118 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6994         118 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6995         118 :     const double dfErrorThreshold = CPLAtof(
    6996         118 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6997             : 
    6998             :     const double dfExcludedValuesThreshold =
    6999         118 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7000             :                                      "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
    7001         118 :         100.0;
    7002             :     const double dfNodataValuesThreshold =
    7003         118 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7004             :                                      "NODATA_VALUES_PCT_THRESHOLD", "100")) /
    7005         118 :         100.0;
    7006             : 
    7007             :     const int nXMargin =
    7008         118 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7009             :     const int nYMargin =
    7010         118 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7011             : 
    7012             :     /* ==================================================================== */
    7013             :     /*      Loop over output lines.                                         */
    7014             :     /* ==================================================================== */
    7015        6603 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7016             :     {
    7017             : 
    7018             :         /* --------------------------------------------------------------------
    7019             :          */
    7020             :         /*      Setup points to transform to source image space. */
    7021             :         /* --------------------------------------------------------------------
    7022             :          */
    7023     1669810 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7024             :         {
    7025     1663330 :             padfX[iDstX] = iDstX + poWK->nDstXOff;
    7026     1663330 :             padfY[iDstX] = iDstY + poWK->nDstYOff;
    7027     1663330 :             padfZ[iDstX] = 0.0;
    7028     1663330 :             padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
    7029     1663330 :             padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
    7030     1663330 :             padfZ2[iDstX] = 0.0;
    7031             :         }
    7032             : 
    7033             :         /* --------------------------------------------------------------------
    7034             :          */
    7035             :         /*      Transform the points from destination pixel/line coordinates */
    7036             :         /*      to source pixel/line coordinates. */
    7037             :         /* --------------------------------------------------------------------
    7038             :          */
    7039        6485 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    7040             :                              padfY, padfZ, pabSuccess);
    7041        6485 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
    7042             :                              padfY2, padfZ2, pabSuccess2);
    7043             : 
    7044        6485 :         if (dfSrcCoordPrecision > 0.0)
    7045             :         {
    7046           0 :             GWKRoundSourceCoordinates(
    7047             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    7048             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    7049           0 :                 poWK->nDstXOff, iDstY + poWK->nDstYOff);
    7050           0 :             GWKRoundSourceCoordinates(
    7051             :                 nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2,
    7052             :                 dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
    7053           0 :                 psJob->pTransformerArg, 1.0 + poWK->nDstXOff,
    7054           0 :                 iDstY + 1.0 + poWK->nDstYOff);
    7055             :         }
    7056             : 
    7057             :         /* ====================================================================
    7058             :          */
    7059             :         /*      Loop over pixels in output scanline. */
    7060             :         /* ====================================================================
    7061             :          */
    7062     1669810 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7063             :         {
    7064     1663330 :             GPtrDiff_t iSrcOffset = 0;
    7065     1663330 :             double dfDensity = 1.0;
    7066     1663330 :             bool bHasFoundDensity = false;
    7067             : 
    7068     1663330 :             if (!pabSuccess[iDstX] || !pabSuccess2[iDstX])
    7069      311460 :                 continue;
    7070             : 
    7071             :             // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
    7072             :             // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
    7073     1663330 :             if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    7074     1663310 :                   padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    7075     1663310 :                   padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    7076     1663290 :                   padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    7077     1663290 :                   padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    7078     1663290 :                   padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    7079     1663280 :                   padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
    7080     1663280 :                   padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
    7081             :             {
    7082          62 :                 continue;
    7083             :             }
    7084             : 
    7085     1663260 :             const GPtrDiff_t iDstOffset =
    7086     1663260 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7087             : 
    7088             :             // Compute corners in source crs.
    7089             : 
    7090             :             // The transformation might not have preserved ordering of
    7091             :             // coordinates so do the necessary swapping (#5433).
    7092             :             // NOTE: this is really an approximative fix. To do something
    7093             :             // more precise we would for example need to compute the
    7094             :             // transformation of coordinates in the
    7095             :             // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
    7096             :             // coordinates, and take the bounding box of the got source
    7097             :             // coordinates.
    7098             : 
    7099     1663260 :             if (padfX[iDstX] > padfX2[iDstX])
    7100      268744 :                 std::swap(padfX[iDstX], padfX2[iDstX]);
    7101             : 
    7102             :             // Detect situations where the target pixel is close to the
    7103             :             // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
    7104             :             // close to the left-most and right-most columns of the source
    7105             :             // raster. The 2 value below was experimentally determined to
    7106             :             // avoid false-positives and false-negatives.
    7107             :             // Addresses https://github.com/OSGeo/gdal/issues/6478
    7108     1663260 :             bool bWrapOverX = false;
    7109     1663260 :             const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
    7110     1663260 :             if (poWK->nSrcXOff == 0 &&
    7111     1663260 :                 padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
    7112       14495 :                 (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale <
    7113             :                     nThresholdWrapOverX)
    7114             :             {
    7115             :                 // Check there is a discontinuity by checking at mid-pixel.
    7116             :                 // NOTE: all this remains fragile. To confidently
    7117             :                 // detect antimeridian warping we should probably try to access
    7118             :                 // georeferenced coordinates, and not rely only on tests on
    7119             :                 // image space coordinates. But accessing georeferenced
    7120             :                 // coordinates from here is not trivial, and we would for example
    7121             :                 // have to handle both geographic, Mercator, etc.
    7122             :                 // Let's hope this heuristics is good enough for now.
    7123        1041 :                 double x = iDstX + 0.5 + poWK->nDstXOff;
    7124        1041 :                 double y = iDstY + poWK->nDstYOff;
    7125        1041 :                 double z = 0;
    7126        1041 :                 int bSuccess = FALSE;
    7127        1041 :                 poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y,
    7128             :                                      &z, &bSuccess);
    7129        1041 :                 if (bSuccess && x < padfX[iDstX])
    7130             :                 {
    7131        1008 :                     bWrapOverX = true;
    7132        1008 :                     std::swap(padfX[iDstX], padfX2[iDstX]);
    7133        1008 :                     padfX2[iDstX] += nSrcXSize;
    7134             :                 }
    7135             :             }
    7136             : 
    7137     1663260 :             const double dfXMin = padfX[iDstX] - poWK->nSrcXOff;
    7138     1663260 :             const double dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
    7139     1663260 :             constexpr double EPS = 1e-10;
    7140             :             // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
    7141     1663260 :             if (!(dfXMax > -EPS && dfXMin < nSrcXSize + EPS))
    7142          72 :                 continue;
    7143     1663190 :             int iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPS), 0.0));
    7144     1663190 :             int iSrcXMax = static_cast<int>(
    7145     1663190 :                 std::min(ceil(dfXMax - EPS), static_cast<double>(INT_MAX)));
    7146     1663190 :             if (!bWrapOverX)
    7147     1662180 :                 iSrcXMax = std::min(iSrcXMax, nSrcXSize);
    7148     1663190 :             if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
    7149         472 :                 iSrcXMax++;
    7150             : 
    7151     1663190 :             if (padfY[iDstX] > padfY2[iDstX])
    7152      270107 :                 std::swap(padfY[iDstX], padfY2[iDstX]);
    7153     1663190 :             const double dfYMin = padfY[iDstX] - poWK->nSrcYOff;
    7154     1663190 :             const double dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
    7155             :             // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
    7156     1663190 :             if (!(dfYMax > -EPS && dfYMin < nSrcYSize + EPS))
    7157          36 :                 continue;
    7158     1663160 :             int iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPS), 0.0));
    7159             :             int iSrcYMax =
    7160     1663160 :                 std::min(static_cast<int>(ceil(dfYMax - EPS)), nSrcYSize);
    7161     1663160 :             if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
    7162           0 :                 iSrcYMax++;
    7163             : 
    7164             : #define COMPUTE_WEIGHT_Y(iSrcY)                                                \
    7165             :     ((iSrcY == iSrcYMin)                                                       \
    7166             :          ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin))        \
    7167             :      : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax)                       \
    7168             :                                : 1.0)
    7169             : 
    7170             : #define COMPUTE_WEIGHT(iSrcX, dfWeightY)                                       \
    7171             :     ((iSrcX == iSrcXMin)       ? ((iSrcXMin + 1 == iSrcXMax)                   \
    7172             :                                       ? dfWeightY                              \
    7173             :                                       : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
    7174             :      : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax))         \
    7175             :                                : dfWeightY)
    7176             : 
    7177     1663160 :             bool bDone = false;
    7178             : 
    7179             :             // Special Average mode where we process all bands together,
    7180             :             // to avoid averaging tuples that match an entry of m_aadfExcludedValues
    7181     2267230 :             if (nAlgo == GWKAOM_Average &&
    7182      604073 :                 (!poWK->m_aadfExcludedValues.empty() ||
    7183      393224 :                  dfNodataValuesThreshold < 1 - EPS) &&
    7184     2267230 :                 !poWK->bApplyVerticalShift && !bIsComplex)
    7185             :             {
    7186      393224 :                 double dfTotalWeightInvalid = 0.0;
    7187      393224 :                 double dfTotalWeightExcluded = 0.0;
    7188      393224 :                 double dfTotalWeightRegular = 0.0;
    7189      786448 :                 std::vector<double> adfValueReal(poWK->nBands, 0);
    7190      786448 :                 std::vector<double> adfValueAveraged(poWK->nBands, 0);
    7191             :                 std::vector<int> anCountExcludedValues(
    7192      393224 :                     poWK->m_aadfExcludedValues.size(), 0);
    7193             : 
    7194     1572890 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7195             :                 {
    7196     1179660 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7197     1179660 :                     iSrcOffset =
    7198     1179660 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7199     5111860 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7200             :                          iSrcX++, iSrcOffset++)
    7201             :                     {
    7202     3932190 :                         if (bWrapOverX)
    7203           0 :                             iSrcOffset =
    7204           0 :                                 (iSrcX % nSrcXSize) +
    7205           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7206             : 
    7207     3932190 :                         const double dfWeight =
    7208     3932190 :                             COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7209     3932190 :                         if (dfWeight <= 0)
    7210           0 :                             continue;
    7211             : 
    7212     3932200 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7213          12 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7214             :                         {
    7215           3 :                             dfTotalWeightInvalid += dfWeight;
    7216           3 :                             continue;
    7217             :                         }
    7218             : 
    7219     3932190 :                         bool bAllValid = true;
    7220     7274900 :                         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7221             :                         {
    7222     6160660 :                             double dfBandDensity = 0;
    7223     6160660 :                             double dfValueImagTmp = 0;
    7224     9503370 :                             if (!(GWKGetPixelValue(
    7225             :                                       poWK, iBand, iSrcOffset, &dfBandDensity,
    7226     6160660 :                                       &adfValueReal[iBand], &dfValueImagTmp) &&
    7227     3342710 :                                   dfBandDensity > BAND_DENSITY_THRESHOLD))
    7228             :                             {
    7229     2817950 :                                 bAllValid = false;
    7230     2817950 :                                 break;
    7231             :                             }
    7232             :                         }
    7233             : 
    7234     3932190 :                         if (!bAllValid)
    7235             :                         {
    7236     2817950 :                             dfTotalWeightInvalid += dfWeight;
    7237     2817950 :                             continue;
    7238             :                         }
    7239             : 
    7240     1114240 :                         bool bExcludedValueFound = false;
    7241     2228350 :                         for (size_t i = 0;
    7242     2228350 :                              i < poWK->m_aadfExcludedValues.size(); ++i)
    7243             :                         {
    7244     1114130 :                             if (poWK->m_aadfExcludedValues[i] == adfValueReal)
    7245             :                             {
    7246          21 :                                 bExcludedValueFound = true;
    7247          21 :                                 ++anCountExcludedValues[i];
    7248          21 :                                 dfTotalWeightExcluded += dfWeight;
    7249          21 :                                 break;
    7250             :                             }
    7251             :                         }
    7252     1114240 :                         if (!bExcludedValueFound)
    7253             :                         {
    7254             :                             // Weighted incremental algorithm mean
    7255             :                             // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7256     1114220 :                             dfTotalWeightRegular += dfWeight;
    7257     4456870 :                             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7258             :                             {
    7259     3342650 :                                 adfValueAveraged[iBand] +=
    7260     6685300 :                                     (dfWeight / dfTotalWeightRegular) *
    7261     6685300 :                                     (adfValueReal[iBand] -
    7262     3342650 :                                      adfValueAveraged[iBand]);
    7263             :                             }
    7264             :                         }
    7265             :                     }
    7266             :                 }
    7267             : 
    7268      393224 :                 const double dfTotalWeight = dfTotalWeightInvalid +
    7269             :                                              dfTotalWeightExcluded +
    7270             :                                              dfTotalWeightRegular;
    7271      393224 :                 if (dfTotalWeightInvalid > 0 &&
    7272             :                     dfTotalWeightInvalid >=
    7273      311293 :                         dfNodataValuesThreshold * dfTotalWeight)
    7274             :                 {
    7275             :                     // Do nothing. Let bHasFoundDensity to false.
    7276             :                 }
    7277       81934 :                 else if (dfTotalWeightExcluded > 0 &&
    7278             :                          dfTotalWeightExcluded >=
    7279           6 :                              dfExcludedValuesThreshold * dfTotalWeight)
    7280             :                 {
    7281             :                     // Find the most represented excluded value tuple
    7282           3 :                     size_t iExcludedValue = 0;
    7283           3 :                     int nExcludedValueCount = 0;
    7284           6 :                     for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
    7285             :                          ++i)
    7286             :                     {
    7287           3 :                         if (anCountExcludedValues[i] > nExcludedValueCount)
    7288             :                         {
    7289           3 :                             iExcludedValue = i;
    7290           3 :                             nExcludedValueCount = anCountExcludedValues[i];
    7291             :                         }
    7292             :                     }
    7293             : 
    7294           3 :                     bHasFoundDensity = true;
    7295             : 
    7296          12 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7297             :                     {
    7298           9 :                         GWKSetPixelValue(
    7299             :                             poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
    7300           9 :                             poWK->m_aadfExcludedValues[iExcludedValue][iBand],
    7301             :                             0);
    7302           3 :                     }
    7303             :                 }
    7304       81931 :                 else if (dfTotalWeightRegular > 0)
    7305             :                 {
    7306       81931 :                     bHasFoundDensity = true;
    7307             : 
    7308      327720 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7309             :                     {
    7310      245789 :                         GWKSetPixelValue(poWK, iBand, iDstOffset,
    7311             :                                          /* dfBandDensity = */ 1.0,
    7312      245789 :                                          adfValueAveraged[iBand], 0);
    7313             :                     }
    7314             :                 }
    7315             : 
    7316             :                 // Skip below loop on bands
    7317      393224 :                 bDone = true;
    7318             :             }
    7319             : 
    7320             :             /* ====================================================================
    7321             :              */
    7322             :             /*      Loop processing each band. */
    7323             :             /* ====================================================================
    7324             :              */
    7325             : 
    7326     4439520 :             for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
    7327             :             {
    7328     2776360 :                 double dfBandDensity = 0.0;
    7329     2776360 :                 double dfValueReal = 0.0;
    7330     2776360 :                 double dfValueImag = 0.0;
    7331     2776360 :                 double dfValueRealTmp = 0.0;
    7332     2776360 :                 double dfValueImagTmp = 0.0;
    7333             : 
    7334             :                 /* --------------------------------------------------------------------
    7335             :                  */
    7336             :                 /*      Collect the source value. */
    7337             :                 /* --------------------------------------------------------------------
    7338             :                  */
    7339             : 
    7340             :                 // Loop over source lines and pixels - 3 possible algorithms.
    7341             : 
    7342             :                 // poWK->eResample == GRA_Average.
    7343     2776360 :                 if (nAlgo == GWKAOM_Average)
    7344             :                 {
    7345      300849 :                     double dfTotalWeight = 0.0;
    7346             : 
    7347             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7348             :                     // in gcore/overview.cpp.
    7349      631308 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7350             :                     {
    7351      330459 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7352      330459 :                         iSrcOffset = iSrcXMin +
    7353      330459 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7354      803200 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7355             :                              iSrcX++, iSrcOffset++)
    7356             :                         {
    7357      472741 :                             if (bWrapOverX)
    7358         630 :                                 iSrcOffset =
    7359         630 :                                     (iSrcX % nSrcXSize) +
    7360         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7361             : 
    7362      472745 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7363           4 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7364             :                                             iSrcOffset))
    7365             :                             {
    7366           1 :                                 continue;
    7367             :                             }
    7368             : 
    7369      472740 :                             if (GWKGetPixelValue(
    7370             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7371      945480 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7372      472740 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7373             :                             {
    7374      472740 :                                 const double dfWeight =
    7375      472740 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7376      472740 :                                 if (dfWeight > 0)
    7377             :                                 {
    7378             :                                     // Weighted incremental algorithm mean
    7379             :                                     // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7380      472740 :                                     dfTotalWeight += dfWeight;
    7381      472740 :                                     dfValueReal +=
    7382      472740 :                                         (dfWeight / dfTotalWeight) *
    7383      472740 :                                         (dfValueRealTmp - dfValueReal);
    7384      472740 :                                     if (bIsComplex)
    7385             :                                     {
    7386         252 :                                         dfValueImag +=
    7387         252 :                                             (dfWeight / dfTotalWeight) *
    7388         252 :                                             (dfValueImagTmp - dfValueImag);
    7389             :                                     }
    7390             :                                 }
    7391             :                             }
    7392             :                         }
    7393             :                     }
    7394             : 
    7395      300849 :                     if (dfTotalWeight > 0)
    7396             :                     {
    7397      300849 :                         if (poWK->bApplyVerticalShift)
    7398             :                         {
    7399           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7400           0 :                                 continue;
    7401             :                             // Subtract padfZ[] since the coordinate
    7402             :                             // transformation is from target to source
    7403           0 :                             dfValueReal =
    7404           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7405           0 :                                 padfZ[iDstX] *
    7406             :                                     dfMultFactorVerticalShiftPipeline;
    7407             :                         }
    7408             : 
    7409      300849 :                         dfBandDensity = 1;
    7410      300849 :                         bHasFoundDensity = true;
    7411             :                     }
    7412             :                 }  // GRA_Average.
    7413             :                 // poWK->eResample == GRA_RMS.
    7414     2776360 :                 if (nAlgo == GWKAOM_RMS)
    7415             :                 {
    7416      300416 :                     double dfTotalReal = 0.0;
    7417      300416 :                     double dfTotalImag = 0.0;
    7418      300416 :                     double dfTotalWeight = 0.0;
    7419             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7420             :                     // in gcore/overview.cpp.
    7421      630578 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7422             :                     {
    7423      330162 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7424      330162 :                         iSrcOffset = iSrcXMin +
    7425      330162 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7426      802723 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7427             :                              iSrcX++, iSrcOffset++)
    7428             :                         {
    7429      472561 :                             if (bWrapOverX)
    7430         630 :                                 iSrcOffset =
    7431         630 :                                     (iSrcX % nSrcXSize) +
    7432         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7433             : 
    7434      472561 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7435           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7436             :                                             iSrcOffset))
    7437             :                             {
    7438           0 :                                 continue;
    7439             :                             }
    7440             : 
    7441      472561 :                             if (GWKGetPixelValue(
    7442             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7443      945122 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7444      472561 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7445             :                             {
    7446      472561 :                                 const double dfWeight =
    7447      472561 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7448      472561 :                                 dfTotalWeight += dfWeight;
    7449      472561 :                                 dfTotalReal +=
    7450      472561 :                                     dfValueRealTmp * dfValueRealTmp * dfWeight;
    7451      472561 :                                 if (bIsComplex)
    7452          48 :                                     dfTotalImag += dfValueImagTmp *
    7453          48 :                                                    dfValueImagTmp * dfWeight;
    7454             :                             }
    7455             :                         }
    7456             :                     }
    7457             : 
    7458      300416 :                     if (dfTotalWeight > 0)
    7459             :                     {
    7460      300416 :                         dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
    7461             : 
    7462      300416 :                         if (poWK->bApplyVerticalShift)
    7463             :                         {
    7464           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7465           0 :                                 continue;
    7466             :                             // Subtract padfZ[] since the coordinate
    7467             :                             // transformation is from target to source
    7468           0 :                             dfValueReal =
    7469           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7470           0 :                                 padfZ[iDstX] *
    7471             :                                     dfMultFactorVerticalShiftPipeline;
    7472             :                         }
    7473             : 
    7474      300416 :                         if (bIsComplex)
    7475          12 :                             dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
    7476             : 
    7477      300416 :                         dfBandDensity = 1;
    7478      300416 :                         bHasFoundDensity = true;
    7479             :                     }
    7480             :                 }  // GRA_RMS.
    7481             : #ifdef disabled
    7482             :                 else if (nAlgo == GWKAOM_Sum)
    7483             :                 // poWK->eResample == GRA_Sum
    7484             :                 {
    7485             :                     double dfTotalReal = 0.0;
    7486             :                     double dfTotalImag = 0.0;
    7487             :                     bool bFoundValid = false;
    7488             : 
    7489             :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7490             :                     {
    7491             :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7492             :                         iSrcOffset = iSrcXMin +
    7493             :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7494             :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7495             :                              iSrcX++, iSrcOffset++)
    7496             :                         {
    7497             :                             if (bWrapOverX)
    7498             :                                 iSrcOffset =
    7499             :                                     (iSrcX % nSrcXSize) +
    7500             :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7501             : 
    7502             :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7503             :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7504             :                                             iSrcOffset))
    7505             :                             {
    7506             :                                 continue;
    7507             :                             }
    7508             : 
    7509             :                             if (GWKGetPixelValue(
    7510             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7511             :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7512             :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7513             :                             {
    7514             :                                 const double dfWeight =
    7515             :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7516             :                                 bFoundValid = true;
    7517             :                                 dfTotalReal += dfValueRealTmp * dfWeight;
    7518             :                                 if (bIsComplex)
    7519             :                                 {
    7520             :                                     dfTotalImag += dfValueImagTmp * dfWeight;
    7521             :                                 }
    7522             :                             }
    7523             :                         }
    7524             :                     }
    7525             : 
    7526             :                     if (bFoundValid)
    7527             :                     {
    7528             :                         dfValueReal = dfTotalReal;
    7529             : 
    7530             :                         if (poWK->bApplyVerticalShift)
    7531             :                         {
    7532             :                             if (!std::isfinite(padfZ[iDstX]))
    7533             :                                 continue;
    7534             :                             // Subtract padfZ[] since the coordinate
    7535             :                             // transformation is from target to source
    7536             :                             dfValueReal =
    7537             :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7538             :                                 padfZ[iDstX] *
    7539             :                                     dfMultFactorVerticalShiftPipeline;
    7540             :                         }
    7541             : 
    7542             :                         if (bIsComplex)
    7543             :                         {
    7544             :                             dfValueImag = dfTotalImag;
    7545             :                         }
    7546             :                         dfBandDensity = 1;
    7547             :                         bHasFoundDensity = true;
    7548             :                     }
    7549             :                 }  // GRA_Sum.
    7550             : #endif
    7551     2475950 :                 else if (nAlgo == GWKAOM_Imode || nAlgo == GWKAOM_Fmode)
    7552             :                 // poWK->eResample == GRA_Mode
    7553             :                 {
    7554             :                     // This code adapted from GDALDownsampleChunk32R_Mode() in
    7555             :                     // gcore/overview.cpp.
    7556      500014 :                     if (nAlgo == GWKAOM_Fmode)  // int32 or float.
    7557             :                     {
    7558             :                         // Does it make sense it makes to run a
    7559             :                         // majority filter on floating point data? But, here it
    7560             :                         // is for the sake of compatibility. It won't look
    7561             :                         // right on RGB images by the nature of the filter.
    7562        3400 :                         int iMaxInd = 0;
    7563        3400 :                         int iMaxVal = -1;
    7564        3400 :                         int i = 0;
    7565             : 
    7566       10200 :                         for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7567             :                         {
    7568        6800 :                             iSrcOffset =
    7569        6800 :                                 iSrcXMin +
    7570        6800 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7571       20400 :                             for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7572             :                                  iSrcX++, iSrcOffset++)
    7573             :                             {
    7574       13600 :                                 if (bWrapOverX)
    7575           0 :                                     iSrcOffset =
    7576           0 :                                         (iSrcX % nSrcXSize) +
    7577           0 :                                         static_cast<GPtrDiff_t>(iSrcY) *
    7578           0 :                                             nSrcXSize;
    7579             : 
    7580       13600 :                                 if (poWK->panUnifiedSrcValid != nullptr &&
    7581           0 :                                     !CPLMaskGet(poWK->panUnifiedSrcValid,
    7582             :                                                 iSrcOffset))
    7583           0 :                                     continue;
    7584             : 
    7585       13600 :                                 if (GWKGetPixelValue(
    7586             :                                         poWK, iBand, iSrcOffset, &dfBandDensity,
    7587       27200 :                                         &dfValueRealTmp, &dfValueImagTmp) &&
    7588       13600 :                                     dfBandDensity > BAND_DENSITY_THRESHOLD)
    7589             :                                 {
    7590       13600 :                                     const float fVal =
    7591       13600 :                                         static_cast<float>(dfValueRealTmp);
    7592             : 
    7593             :                                     // Check array for existing entry.
    7594       32685 :                                     for (i = 0; i < iMaxInd; ++i)
    7595       22512 :                                         if (pafRealVals[i] == fVal &&
    7596        2626 :                                             ++panRealSums[i] >
    7597        2626 :                                                 panRealSums[iMaxVal])
    7598             :                                         {
    7599         801 :                                             iMaxVal = i;
    7600         801 :                                             break;
    7601             :                                         }
    7602             : 
    7603             :                                     // Add to arr if entry not already there.
    7604       13600 :                                     if (i == iMaxInd)
    7605             :                                     {
    7606       12799 :                                         pafRealVals[iMaxInd] = fVal;
    7607       12799 :                                         panRealSums[iMaxInd] = 1;
    7608             : 
    7609       12799 :                                         if (iMaxVal < 0)
    7610        3400 :                                             iMaxVal = iMaxInd;
    7611             : 
    7612       12799 :                                         ++iMaxInd;
    7613             :                                     }
    7614             :                                 }
    7615             :                             }
    7616             :                         }
    7617             : 
    7618        3400 :                         if (iMaxVal != -1)
    7619             :                         {
    7620        3400 :                             dfValueReal = pafRealVals[iMaxVal];
    7621             : 
    7622        3400 :                             if (poWK->bApplyVerticalShift)
    7623             :                             {
    7624           0 :                                 if (!std::isfinite(padfZ[iDstX]))
    7625           0 :                                     continue;
    7626             :                                 // Subtract padfZ[] since the coordinate
    7627             :                                 // transformation is from target to source
    7628           0 :                                 dfValueReal =
    7629           0 :                                     dfValueReal *
    7630           0 :                                         poWK->dfMultFactorVerticalShift -
    7631           0 :                                     padfZ[iDstX] *
    7632             :                                         dfMultFactorVerticalShiftPipeline;
    7633             :                             }
    7634             : 
    7635        3400 :                             dfBandDensity = 1;
    7636        3400 :                             bHasFoundDensity = true;
    7637             :                         }
    7638             :                     }
    7639             :                     else  // byte or int16.
    7640             :                     {
    7641      496614 :                         int nMaxVal = 0;
    7642      496614 :                         int iMaxInd = -1;
    7643             : 
    7644      496614 :                         memset(panVals, 0, nBins * sizeof(int));
    7645             : 
    7646     1612530 :                         for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7647             :                         {
    7648     1115920 :                             iSrcOffset =
    7649     1115920 :                                 iSrcXMin +
    7650     1115920 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7651     4733090 :                             for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7652             :                                  iSrcX++, iSrcOffset++)
    7653             :                             {
    7654     3617170 :                                 if (bWrapOverX)
    7655         630 :                                     iSrcOffset =
    7656         630 :                                         (iSrcX % nSrcXSize) +
    7657         630 :                                         static_cast<GPtrDiff_t>(iSrcY) *
    7658         630 :                                             nSrcXSize;
    7659             : 
    7660     3617170 :                                 if (poWK->panUnifiedSrcValid != nullptr &&
    7661           0 :                                     !CPLMaskGet(poWK->panUnifiedSrcValid,
    7662             :                                                 iSrcOffset))
    7663           0 :                                     continue;
    7664             : 
    7665     3617170 :                                 if (GWKGetPixelValue(
    7666             :                                         poWK, iBand, iSrcOffset, &dfBandDensity,
    7667     7234340 :                                         &dfValueRealTmp, &dfValueImagTmp) &&
    7668     3617170 :                                     dfBandDensity > BAND_DENSITY_THRESHOLD)
    7669             :                                 {
    7670     3617170 :                                     const int nVal =
    7671     3617170 :                                         static_cast<int>(dfValueRealTmp);
    7672     3617170 :                                     if (++panVals[nVal + nBinsOffset] > nMaxVal)
    7673             :                                     {
    7674             :                                         // Sum the density.
    7675             :                                         // Is it the most common value so far?
    7676     2812830 :                                         iMaxInd = nVal;
    7677     2812830 :                                         nMaxVal = panVals[nVal + nBinsOffset];
    7678             :                                     }
    7679             :                                 }
    7680             :                             }
    7681             :                         }
    7682             : 
    7683      496614 :                         if (iMaxInd != -1)
    7684             :                         {
    7685      496614 :                             dfValueReal = iMaxInd;
    7686             : 
    7687      496614 :                             if (poWK->bApplyVerticalShift)
    7688             :                             {
    7689           0 :                                 if (!std::isfinite(padfZ[iDstX]))
    7690           0 :                                     continue;
    7691             :                                 // Subtract padfZ[] since the coordinate
    7692             :                                 // transformation is from target to source
    7693           0 :                                 dfValueReal =
    7694           0 :                                     dfValueReal *
    7695           0 :                                         poWK->dfMultFactorVerticalShift -
    7696           0 :                                     padfZ[iDstX] *
    7697             :                                         dfMultFactorVerticalShiftPipeline;
    7698             :                             }
    7699             : 
    7700      496614 :                             dfBandDensity = 1;
    7701      496614 :                             bHasFoundDensity = true;
    7702             :                         }
    7703      500014 :                     }
    7704             :                 }  // GRA_Mode.
    7705     1975930 :                 else if (nAlgo == GWKAOM_Max)
    7706             :                 // poWK->eResample == GRA_Max.
    7707             :                 {
    7708      335037 :                     bool bFoundValid = false;
    7709      335037 :                     double dfTotalReal = std::numeric_limits<double>::lowest();
    7710             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7711     1288010 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7712             :                     {
    7713      952975 :                         iSrcOffset = iSrcXMin +
    7714      952975 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7715     4406540 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7716             :                              iSrcX++, iSrcOffset++)
    7717             :                         {
    7718     3453560 :                             if (bWrapOverX)
    7719         630 :                                 iSrcOffset =
    7720         630 :                                     (iSrcX % nSrcXSize) +
    7721         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7722             : 
    7723     3456370 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7724        2809 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7725             :                                             iSrcOffset))
    7726             :                             {
    7727        2446 :                                 continue;
    7728             :                             }
    7729             : 
    7730             :                             // Returns pixel value if it is not no data.
    7731     3451120 :                             if (GWKGetPixelValue(
    7732             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7733     6902230 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7734     3451120 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7735             :                             {
    7736     3451120 :                                 bFoundValid = true;
    7737     3451120 :                                 if (dfTotalReal < dfValueRealTmp)
    7738             :                                 {
    7739      442642 :                                     dfTotalReal = dfValueRealTmp;
    7740             :                                 }
    7741             :                             }
    7742             :                         }
    7743             :                     }
    7744             : 
    7745      335037 :                     if (bFoundValid)
    7746             :                     {
    7747      335037 :                         dfValueReal = dfTotalReal;
    7748             : 
    7749      335037 :                         if (poWK->bApplyVerticalShift)
    7750             :                         {
    7751           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7752           0 :                                 continue;
    7753             :                             // Subtract padfZ[] since the coordinate
    7754             :                             // transformation is from target to source
    7755           0 :                             dfValueReal =
    7756           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7757           0 :                                 padfZ[iDstX] *
    7758             :                                     dfMultFactorVerticalShiftPipeline;
    7759             :                         }
    7760             : 
    7761      335037 :                         dfBandDensity = 1;
    7762      335037 :                         bHasFoundDensity = true;
    7763             :                     }
    7764             :                 }  // GRA_Max.
    7765     1640900 :                 else if (nAlgo == GWKAOM_Min)
    7766             :                 // poWK->eResample == GRA_Min.
    7767             :                 {
    7768      335012 :                     bool bFoundValid = false;
    7769      335012 :                     double dfTotalReal = std::numeric_limits<double>::max();
    7770             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7771     1287720 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7772             :                     {
    7773      952710 :                         iSrcOffset = iSrcXMin +
    7774      952710 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7775     4403460 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7776             :                              iSrcX++, iSrcOffset++)
    7777             :                         {
    7778     3450750 :                             if (bWrapOverX)
    7779         630 :                                 iSrcOffset =
    7780         630 :                                     (iSrcX % nSrcXSize) +
    7781         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7782             : 
    7783     3450750 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7784           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7785             :                                             iSrcOffset))
    7786             :                             {
    7787           0 :                                 continue;
    7788             :                             }
    7789             : 
    7790             :                             // Returns pixel value if it is not no data.
    7791     3450750 :                             if (GWKGetPixelValue(
    7792             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7793     6901500 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7794     3450750 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7795             :                             {
    7796     3450750 :                                 bFoundValid = true;
    7797     3450750 :                                 if (dfTotalReal > dfValueRealTmp)
    7798             :                                 {
    7799      443069 :                                     dfTotalReal = dfValueRealTmp;
    7800             :                                 }
    7801             :                             }
    7802             :                         }
    7803             :                     }
    7804             : 
    7805      335012 :                     if (bFoundValid)
    7806             :                     {
    7807      335012 :                         dfValueReal = dfTotalReal;
    7808             : 
    7809      335012 :                         if (poWK->bApplyVerticalShift)
    7810             :                         {
    7811           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7812           0 :                                 continue;
    7813             :                             // Subtract padfZ[] since the coordinate
    7814             :                             // transformation is from target to source
    7815           0 :                             dfValueReal =
    7816           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7817           0 :                                 padfZ[iDstX] *
    7818             :                                     dfMultFactorVerticalShiftPipeline;
    7819             :                         }
    7820             : 
    7821      335012 :                         dfBandDensity = 1;
    7822      335012 :                         bHasFoundDensity = true;
    7823             :                     }
    7824             :                 }  // GRA_Min.
    7825     1305880 :                 else if (nAlgo == GWKAOM_Quant)
    7826             :                 // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
    7827             :                 {
    7828     1005040 :                     bool bFoundValid = false;
    7829     1005040 :                     std::vector<double> dfRealValuesTmp;
    7830             : 
    7831             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7832     3863170 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7833             :                     {
    7834     2858130 :                         iSrcOffset = iSrcXMin +
    7835     2858130 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7836    13210400 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7837             :                              iSrcX++, iSrcOffset++)
    7838             :                         {
    7839    10352300 :                             if (bWrapOverX)
    7840        1890 :                                 iSrcOffset =
    7841        1890 :                                     (iSrcX % nSrcXSize) +
    7842        1890 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7843             : 
    7844    10352300 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7845           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7846             :                                             iSrcOffset))
    7847             :                             {
    7848           0 :                                 continue;
    7849             :                             }
    7850             : 
    7851             :                             // Returns pixel value if it is not no data.
    7852    10352300 :                             if (GWKGetPixelValue(
    7853             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7854    20704500 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7855    10352300 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7856             :                             {
    7857    10352300 :                                 bFoundValid = true;
    7858    10352300 :                                 dfRealValuesTmp.push_back(dfValueRealTmp);
    7859             :                             }
    7860             :                         }
    7861             :                     }
    7862             : 
    7863     1005040 :                     if (bFoundValid)
    7864             :                     {
    7865     1005040 :                         std::sort(dfRealValuesTmp.begin(),
    7866             :                                   dfRealValuesTmp.end());
    7867             :                         int quantIdx = static_cast<int>(
    7868     1005040 :                             std::ceil(quant * dfRealValuesTmp.size() - 1));
    7869     1005040 :                         dfValueReal = dfRealValuesTmp[quantIdx];
    7870             : 
    7871     1005040 :                         if (poWK->bApplyVerticalShift)
    7872             :                         {
    7873           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7874           0 :                                 continue;
    7875             :                             // Subtract padfZ[] since the coordinate
    7876             :                             // transformation is from target to source
    7877           0 :                             dfValueReal =
    7878           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7879           0 :                                 padfZ[iDstX] *
    7880             :                                     dfMultFactorVerticalShiftPipeline;
    7881             :                         }
    7882             : 
    7883     1005040 :                         dfBandDensity = 1;
    7884     1005040 :                         bHasFoundDensity = true;
    7885     1005040 :                         dfRealValuesTmp.clear();
    7886             :                     }
    7887             :                 }  // Quantile.
    7888             : 
    7889             :                 /* --------------------------------------------------------------------
    7890             :                  */
    7891             :                 /*      We have a computed value from the source.  Now apply it
    7892             :                  * to      */
    7893             :                 /*      the destination pixel. */
    7894             :                 /* --------------------------------------------------------------------
    7895             :                  */
    7896     2776360 :                 if (bHasFoundDensity)
    7897             :                 {
    7898             :                     // TODO: Should we compute dfBandDensity in fct of
    7899             :                     // nCount/nCount2, or use as a threshold to set the dest
    7900             :                     // value?
    7901             :                     // dfBandDensity = (float) nCount / nCount2;
    7902             :                     // if( (float) nCount / nCount2 > 0.1 )
    7903             :                     // or fix gdalwarp crop_to_cutline to crop partially
    7904             :                     // overlapping pixels.
    7905     2776360 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    7906             :                                      dfValueReal, dfValueImag);
    7907             :                 }
    7908             :             }
    7909             : 
    7910     1663160 :             if (!bHasFoundDensity)
    7911      311290 :                 continue;
    7912             : 
    7913             :             /* --------------------------------------------------------------------
    7914             :              */
    7915             :             /*      Update destination density/validity masks. */
    7916             :             /* --------------------------------------------------------------------
    7917             :              */
    7918     1351870 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7919             : 
    7920     1351870 :             if (poWK->panDstValid != nullptr)
    7921             :             {
    7922          74 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7923             :             }
    7924             :         } /* Next iDstX */
    7925             : 
    7926             :         /* --------------------------------------------------------------------
    7927             :          */
    7928             :         /*      Report progress to the user, and optionally cancel out. */
    7929             :         /* --------------------------------------------------------------------
    7930             :          */
    7931        6485 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    7932           0 :             break;
    7933             :     }
    7934             : 
    7935             :     /* -------------------------------------------------------------------- */
    7936             :     /*      Cleanup and return.                                             */
    7937             :     /* -------------------------------------------------------------------- */
    7938         118 :     CPLFree(padfX);
    7939         118 :     CPLFree(padfY);
    7940         118 :     CPLFree(padfZ);
    7941         118 :     CPLFree(padfX2);
    7942         118 :     CPLFree(padfY2);
    7943         118 :     CPLFree(padfZ2);
    7944         118 :     CPLFree(pabSuccess);
    7945         118 :     CPLFree(pabSuccess2);
    7946         118 :     VSIFree(panVals);
    7947         118 :     VSIFree(pafRealVals);
    7948         118 :     VSIFree(panRealSums);
    7949         118 :     if (bIsComplex)
    7950             :     {
    7951          18 :         VSIFree(pafImagVals);
    7952          18 :         VSIFree(panImagSums);
    7953             :     }
    7954             : }
    7955             : 
    7956             : /************************************************************************/
    7957             : /*                         getOrientation()                             */
    7958             : /************************************************************************/
    7959             : 
    7960             : typedef std::pair<double, double> XYPair;
    7961             : 
    7962             : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
    7963             : // -1 if it is counter-clockwise oriented,
    7964             : // or 0 if it is colinear.
    7965     2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
    7966             : {
    7967     2355910 :     const double p1x = p1.first;
    7968     2355910 :     const double p1y = p1.second;
    7969     2355910 :     const double p2x = p2.first;
    7970     2355910 :     const double p2y = p2.second;
    7971     2355910 :     const double p3x = p3.first;
    7972     2355910 :     const double p3y = p3.second;
    7973     2355910 :     const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
    7974     2355910 :     if (std::abs(val) < 1e-20)
    7975        2690 :         return 0;
    7976     2353220 :     else if (val > 0)
    7977           0 :         return 1;
    7978             :     else
    7979     2353220 :         return -1;
    7980             : }
    7981             : 
    7982             : /************************************************************************/
    7983             : /*                          isConvex()                                  */
    7984             : /************************************************************************/
    7985             : 
    7986             : typedef std::vector<XYPair> XYPoly;
    7987             : 
    7988             : // poly must be closed
    7989      785302 : static bool isConvex(const XYPoly &poly)
    7990             : {
    7991      785302 :     const size_t n = poly.size();
    7992      785302 :     size_t i = 0;
    7993      785302 :     int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    7994      785302 :     ++i;
    7995     2355910 :     for (; i < n - 2; ++i)
    7996             :     {
    7997             :         const int orientation =
    7998     1570600 :             getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    7999     1570600 :         if (orientation != 0)
    8000             :         {
    8001     1567910 :             if (last_orientation == 0)
    8002           0 :                 last_orientation = orientation;
    8003     1567910 :             else if (orientation != last_orientation)
    8004           0 :                 return false;
    8005             :         }
    8006             :     }
    8007      785302 :     return true;
    8008             : }
    8009             : 
    8010             : /************************************************************************/
    8011             : /*                     pointIntersectsConvexPoly()                      */
    8012             : /************************************************************************/
    8013             : 
    8014             : // Returns whether xy intersects poly, that must be closed and convex.
    8015     6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
    8016             : {
    8017     6049100 :     const size_t n = poly.size();
    8018     6049100 :     double dx1 = xy.first - poly[0].first;
    8019     6049100 :     double dy1 = xy.second - poly[0].second;
    8020     6049100 :     double dx2 = poly[1].first - poly[0].first;
    8021     6049100 :     double dy2 = poly[1].second - poly[0].second;
    8022     6049100 :     double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
    8023             : 
    8024             :     // Check if the point remains on the same side (left/right) of all edges
    8025    14556400 :     for (size_t i = 2; i < n; i++)
    8026             :     {
    8027    12793100 :         dx1 = xy.first - poly[i - 1].first;
    8028    12793100 :         dy1 = xy.second - poly[i - 1].second;
    8029             : 
    8030    12793100 :         dx2 = poly[i].first - poly[i - 1].first;
    8031    12793100 :         dy2 = poly[i].second - poly[i - 1].second;
    8032             : 
    8033    12793100 :         double crossProduct = dx1 * dy2 - dx2 * dy1;
    8034    12793100 :         if (std::abs(prevCrossProduct) < 1e-20)
    8035      725558 :             prevCrossProduct = crossProduct;
    8036    12067500 :         else if (prevCrossProduct * crossProduct < 0)
    8037     4285760 :             return false;
    8038             :     }
    8039             : 
    8040     1763340 :     return true;
    8041             : }
    8042             : 
    8043             : /************************************************************************/
    8044             : /*                     getIntersection()                                */
    8045             : /************************************************************************/
    8046             : 
    8047             : /* Returns intersection of [p1,p2] with [p3,p4], if
    8048             :  * it is a single point, and the 2 segments are not colinear.
    8049             :  */
    8050    11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
    8051             :                             const XYPair &p3, const XYPair &p4, XYPair &xy)
    8052             : {
    8053    11811000 :     const double x1 = p1.first;
    8054    11811000 :     const double y1 = p1.second;
    8055    11811000 :     const double x2 = p2.first;
    8056    11811000 :     const double y2 = p2.second;
    8057    11811000 :     const double x3 = p3.first;
    8058    11811000 :     const double y3 = p3.second;
    8059    11811000 :     const double x4 = p4.first;
    8060    11811000 :     const double y4 = p4.second;
    8061    11811000 :     const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
    8062    11811000 :     const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
    8063    11811000 :     if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
    8064     9260780 :         return false;
    8065             : 
    8066     2550260 :     const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
    8067     2550260 :     if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
    8068      973924 :         return false;
    8069             : 
    8070     1576340 :     const double t = t_num / denom;
    8071     1576340 :     xy.first = x1 + t * (x2 - x1);
    8072     1576340 :     xy.second = y1 + t * (y2 - y1);
    8073     1576340 :     return true;
    8074             : }
    8075             : 
    8076             : /************************************************************************/
    8077             : /*                     getConvexPolyIntersection()                      */
    8078             : /************************************************************************/
    8079             : 
    8080             : // poly1 and poly2 must be closed and convex.
    8081             : // The returned intersection will not necessary be closed.
    8082      785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
    8083             :                                       XYPoly &intersection)
    8084             : {
    8085      785302 :     intersection.clear();
    8086             : 
    8087             :     // Add all points of poly1 inside poly2
    8088     3926510 :     for (size_t i = 0; i < poly1.size() - 1; ++i)
    8089             :     {
    8090     3141210 :         if (pointIntersectsConvexPoly(poly1[i], poly2))
    8091     1187430 :             intersection.push_back(poly1[i]);
    8092             :     }
    8093      785302 :     if (intersection.size() == poly1.size() - 1)
    8094             :     {
    8095             :         // poly1 is inside poly2
    8096      119100 :         return;
    8097             :     }
    8098             : 
    8099             :     // Add all points of poly2 inside poly1
    8100     3634860 :     for (size_t i = 0; i < poly2.size() - 1; ++i)
    8101             :     {
    8102     2907890 :         if (pointIntersectsConvexPoly(poly2[i], poly1))
    8103      575904 :             intersection.push_back(poly2[i]);
    8104             :     }
    8105             : 
    8106             :     // Compute the intersection of all edges of both polygons
    8107      726972 :     XYPair xy;
    8108     3634860 :     for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
    8109             :     {
    8110    14539400 :         for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
    8111             :         {
    8112    11631600 :             if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
    8113    11631600 :                                 poly2[i2 + 1], xy))
    8114             :             {
    8115     1576230 :                 intersection.push_back(xy);
    8116             :             }
    8117             :         }
    8118             :     }
    8119             : 
    8120      726972 :     if (intersection.empty())
    8121       60770 :         return;
    8122             : 
    8123             :     // Find lowest-left point in intersection set
    8124      666202 :     double lowest_x = std::numeric_limits<double>::max();
    8125      666202 :     double lowest_y = std::numeric_limits<double>::max();
    8126     3772450 :     for (const auto &pair : intersection)
    8127             :     {
    8128     3106240 :         const double x = pair.first;
    8129     3106240 :         const double y = pair.second;
    8130     3106240 :         if (y < lowest_y || (y == lowest_y && x < lowest_x))
    8131             :         {
    8132     1096040 :             lowest_x = x;
    8133     1096040 :             lowest_y = y;
    8134             :         }
    8135             :     }
    8136             : 
    8137     5737980 :     const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
    8138             :     {
    8139     5737980 :         const double p1x_diff = p1.first - lowest_x;
    8140     5737980 :         const double p1y_diff = p1.second - lowest_y;
    8141     5737980 :         const double p2x_diff = p2.first - lowest_x;
    8142     5737980 :         const double p2y_diff = p2.second - lowest_y;
    8143     5737980 :         if (p2y_diff == 0.0 && p1y_diff == 0.0)
    8144             :         {
    8145     2655420 :             if (p1x_diff >= 0)
    8146             :             {
    8147     2655420 :                 if (p2x_diff >= 0)
    8148     2655420 :                     return p1.first < p2.first;
    8149           0 :                 return true;
    8150             :             }
    8151             :             else
    8152             :             {
    8153           0 :                 if (p2x_diff >= 0)
    8154           0 :                     return false;
    8155           0 :                 return p1.first < p2.first;
    8156             :             }
    8157             :         }
    8158             : 
    8159     3082560 :         if (p2x_diff == 0.0 && p1x_diff == 0.0)
    8160     1046960 :             return p1.second < p2.second;
    8161             : 
    8162             :         double tan_p1;
    8163     2035600 :         if (p1x_diff == 0.0)
    8164      464622 :             tan_p1 = p1y_diff == 0.0 ? 0.0 : std::numeric_limits<double>::max();
    8165             :         else
    8166     1570980 :             tan_p1 = p1y_diff / p1x_diff;
    8167             : 
    8168             :         double tan_p2;
    8169     2035600 :         if (p2x_diff == 0.0)
    8170      839515 :             tan_p2 = p2y_diff == 0.0 ? 0.0 : std::numeric_limits<double>::max();
    8171             :         else
    8172     1196080 :             tan_p2 = p2y_diff / p2x_diff;
    8173             : 
    8174     2035600 :         if (tan_p1 >= 0)
    8175             :         {
    8176     1904790 :             if (tan_p2 >= 0)
    8177     1881590 :                 return tan_p1 < tan_p2;
    8178             :             else
    8179       23199 :                 return true;
    8180             :         }
    8181             :         else
    8182             :         {
    8183      130806 :             if (tan_p2 >= 0)
    8184      103900 :                 return false;
    8185             :             else
    8186       26906 :                 return tan_p1 < tan_p2;
    8187             :         }
    8188      666202 :     };
    8189             : 
    8190             :     // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
    8191             :     // hull
    8192      666202 :     std::sort(intersection.begin(), intersection.end(), sortFunc);
    8193             : 
    8194             :     // Remove duplicated points
    8195      666202 :     size_t j = 1;
    8196     3106240 :     for (size_t i = 1; i < intersection.size(); ++i)
    8197             :     {
    8198     2440040 :         if (intersection[i] != intersection[i - 1])
    8199             :         {
    8200     1452560 :             if (j < i)
    8201      545275 :                 intersection[j] = intersection[i];
    8202     1452560 :             ++j;
    8203             :         }
    8204             :     }
    8205      666202 :     intersection.resize(j);
    8206             : }
    8207             : 
    8208             : /************************************************************************/
    8209             : /*                            getArea()                                 */
    8210             : /************************************************************************/
    8211             : 
    8212             : // poly may or may not be closed.
    8213      558521 : static double getArea(const XYPoly &poly)
    8214             : {
    8215             :     // CPLAssert(poly.size() >= 2);
    8216      558521 :     const size_t nPointCount = poly.size();
    8217             :     double dfAreaSum =
    8218      558521 :         poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
    8219             : 
    8220     1765140 :     for (size_t i = 1; i < nPointCount - 1; i++)
    8221             :     {
    8222     1206610 :         dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
    8223             :     }
    8224             : 
    8225      558521 :     dfAreaSum += poly[nPointCount - 1].first *
    8226      558521 :                  (poly[0].second - poly[nPointCount - 2].second);
    8227             : 
    8228      558521 :     return 0.5 * std::fabs(dfAreaSum);
    8229             : }
    8230             : 
    8231             : /************************************************************************/
    8232             : /*                           GWKSumPreserving()                         */
    8233             : /************************************************************************/
    8234             : 
    8235             : static void GWKSumPreservingThread(void *pData);
    8236             : 
    8237          18 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
    8238             : {
    8239          18 :     return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
    8240             : }
    8241             : 
    8242          18 : static void GWKSumPreservingThread(void *pData)
    8243             : {
    8244          18 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    8245          18 :     GDALWarpKernel *poWK = psJob->poWK;
    8246          18 :     const int iYMin = psJob->iYMin;
    8247          18 :     const int iYMax = psJob->iYMax;
    8248             :     const bool bIsAffineNoRotation =
    8249          18 :         GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
    8250          26 :                                         poWK->pTransformerArg) &&
    8251             :         // for debug/testing purposes
    8252           8 :         CPLTestBool(
    8253          18 :             CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
    8254             : 
    8255          18 :     const int nDstXSize = poWK->nDstXSize;
    8256          18 :     const int nSrcXSize = poWK->nSrcXSize;
    8257          18 :     const int nSrcYSize = poWK->nSrcYSize;
    8258             : 
    8259          36 :     std::vector<double> adfX0(nSrcXSize + 1);
    8260          36 :     std::vector<double> adfY0(nSrcXSize + 1);
    8261          36 :     std::vector<double> adfZ0(nSrcXSize + 1);
    8262          36 :     std::vector<double> adfX1(nSrcXSize + 1);
    8263          36 :     std::vector<double> adfY1(nSrcXSize + 1);
    8264          36 :     std::vector<double> adfZ1(nSrcXSize + 1);
    8265          36 :     std::vector<int> abSuccess0(nSrcXSize + 1);
    8266          36 :     std::vector<int> abSuccess1(nSrcXSize + 1);
    8267             : 
    8268             :     CPLRectObj sGlobalBounds;
    8269          18 :     sGlobalBounds.minx = -2 * poWK->dfXScale;
    8270          18 :     sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
    8271          18 :     sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
    8272          18 :     sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
    8273          18 :     CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
    8274             : 
    8275             :     struct SourcePixel
    8276             :     {
    8277             :         int iSrcX;
    8278             :         int iSrcY;
    8279             : 
    8280             :         // Coordinates of source pixel in target pixel coordinates
    8281             :         double dfDstX0;
    8282             :         double dfDstY0;
    8283             :         double dfDstX1;
    8284             :         double dfDstY1;
    8285             :         double dfDstX2;
    8286             :         double dfDstY2;
    8287             :         double dfDstX3;
    8288             :         double dfDstY3;
    8289             : 
    8290             :         // Source pixel total area (might be larger than the one described
    8291             :         // by above coordinates, if the pixel was crossing the antimeridian
    8292             :         // and split)
    8293             :         double dfArea;
    8294             :     };
    8295             : 
    8296          36 :     std::vector<SourcePixel> sourcePixels;
    8297             : 
    8298          36 :     XYPoly discontinuityLeft(5);
    8299          36 :     XYPoly discontinuityRight(5);
    8300             : 
    8301             :     /* ==================================================================== */
    8302             :     /*      First pass: transform the 4 corners of each potential           */
    8303             :     /*      contributing source pixel to target pixel coordinates.          */
    8304             :     /* ==================================================================== */
    8305             : 
    8306             :     // Special case for top line
    8307             :     {
    8308          18 :         int iY = 0;
    8309        1130 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8310             :         {
    8311        1112 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8312        1112 :             adfY1[iX] = iY + poWK->nSrcYOff;
    8313        1112 :             adfZ1[iX] = 0;
    8314             :         }
    8315             : 
    8316          18 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8317             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8318             :                              abSuccess1.data());
    8319             : 
    8320        1130 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8321             :         {
    8322        1112 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8323           0 :                 abSuccess1[iX] = FALSE;
    8324             :             else
    8325             :             {
    8326        1112 :                 adfX1[iX] -= poWK->nDstXOff;
    8327        1112 :                 adfY1[iX] -= poWK->nDstYOff;
    8328             :             }
    8329             :         }
    8330             :     }
    8331             : 
    8332      413412 :     const auto getInsideXSign = [poWK, nDstXSize](double dfX)
    8333             :     {
    8334      413412 :         return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
    8335      205344 :                        dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
    8336      413412 :                    ? 1
    8337      208068 :                    : -1;
    8338          18 :     };
    8339             : 
    8340             :     const auto FindDiscontinuity =
    8341          80 :         [poWK, psJob, getInsideXSign](
    8342             :             double dfXLeft, double dfXRight, double dfY,
    8343             :             int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
    8344         800 :             double &dfXMidReprojectedRight, double &dfYMidReprojected)
    8345             :     {
    8346         880 :         for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
    8347             :         {
    8348         800 :             double dfXMid = (dfXLeft + dfXRight) / 2;
    8349         800 :             double dfXMidReprojected = dfXMid;
    8350         800 :             dfYMidReprojected = dfY;
    8351         800 :             double dfZ = 0;
    8352         800 :             int nSuccess = 0;
    8353         800 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
    8354             :                                  &dfXMidReprojected, &dfYMidReprojected, &dfZ,
    8355             :                                  &nSuccess);
    8356         800 :             if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
    8357             :             {
    8358         456 :                 dfXRight = dfXMid;
    8359         456 :                 dfXMidReprojectedRight = dfXMidReprojected;
    8360             :             }
    8361             :             else
    8362             :             {
    8363         344 :                 dfXLeft = dfXMid;
    8364         344 :                 dfXMidReprojectedLeft = dfXMidReprojected;
    8365             :             }
    8366             :         }
    8367          80 :     };
    8368             : 
    8369         566 :     for (int iY = 0; iY < nSrcYSize; ++iY)
    8370             :     {
    8371         548 :         std::swap(adfX0, adfX1);
    8372         548 :         std::swap(adfY0, adfY1);
    8373         548 :         std::swap(adfZ0, adfZ1);
    8374         548 :         std::swap(abSuccess0, abSuccess1);
    8375             : 
    8376      104512 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8377             :         {
    8378      103964 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8379      103964 :             adfY1[iX] = iY + 1 + poWK->nSrcYOff;
    8380      103964 :             adfZ1[iX] = 0;
    8381             :         }
    8382             : 
    8383         548 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8384             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8385             :                              abSuccess1.data());
    8386             : 
    8387      104512 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8388             :         {
    8389      103964 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8390           0 :                 abSuccess1[iX] = FALSE;
    8391             :             else
    8392             :             {
    8393      103964 :                 adfX1[iX] -= poWK->nDstXOff;
    8394      103964 :                 adfY1[iX] -= poWK->nDstYOff;
    8395             :             }
    8396             :         }
    8397             : 
    8398      103964 :         for (int iX = 0; iX < nSrcXSize; ++iX)
    8399             :         {
    8400      206832 :             if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
    8401      103416 :                 abSuccess1[iX + 1])
    8402             :             {
    8403             :                 /* --------------------------------------------------------------------
    8404             :                  */
    8405             :                 /*      Do not try to apply transparent source pixels to the
    8406             :                  * destination.*/
    8407             :                 /* --------------------------------------------------------------------
    8408             :                  */
    8409      103416 :                 const auto iSrcOffset =
    8410      103416 :                     iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
    8411      105816 :                 if (poWK->panUnifiedSrcValid != nullptr &&
    8412        2400 :                     !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    8413             :                 {
    8414       10971 :                     continue;
    8415             :                 }
    8416             : 
    8417      103410 :                 if (poWK->pafUnifiedSrcDensity != nullptr)
    8418             :                 {
    8419           0 :                     if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
    8420             :                         SRC_DENSITY_THRESHOLD)
    8421           0 :                         continue;
    8422             :                 }
    8423             : 
    8424             :                 SourcePixel sp;
    8425      103410 :                 sp.dfArea = 0;
    8426      103410 :                 sp.dfDstX0 = adfX0[iX];
    8427      103410 :                 sp.dfDstY0 = adfY0[iX];
    8428      103410 :                 sp.dfDstX1 = adfX0[iX + 1];
    8429      103410 :                 sp.dfDstY1 = adfY0[iX + 1];
    8430      103410 :                 sp.dfDstX2 = adfX1[iX + 1];
    8431      103410 :                 sp.dfDstY2 = adfY1[iX + 1];
    8432      103410 :                 sp.dfDstX3 = adfX1[iX];
    8433      103410 :                 sp.dfDstY3 = adfY1[iX];
    8434             : 
    8435             :                 // Detect pixel that likely cross the anti-meridian and
    8436             :                 // introduce a discontinuity when reprojected.
    8437             : 
    8438      103410 :                 if (getInsideXSign(adfX0[iX]) !=
    8439      103506 :                         getInsideXSign(adfX0[iX + 1]) &&
    8440         164 :                     getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
    8441          68 :                     getInsideXSign(adfX0[iX + 1]) ==
    8442      103574 :                         getInsideXSign(adfX1[iX + 1]) &&
    8443          40 :                     (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
    8444             :                         0)
    8445             :                 {
    8446          40 :                     double dfXMidReprojectedLeftTop = 0;
    8447          40 :                     double dfXMidReprojectedRightTop = 0;
    8448          40 :                     double dfYMidReprojectedTop = 0;
    8449          40 :                     FindDiscontinuity(
    8450          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8451          80 :                         iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
    8452             :                         dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
    8453             :                         dfYMidReprojectedTop);
    8454          40 :                     double dfXMidReprojectedLeftBottom = 0;
    8455          40 :                     double dfXMidReprojectedRightBottom = 0;
    8456          40 :                     double dfYMidReprojectedBottom = 0;
    8457          40 :                     FindDiscontinuity(
    8458          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8459          80 :                         iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
    8460             :                         dfXMidReprojectedLeftBottom,
    8461             :                         dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
    8462             : 
    8463          40 :                     discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
    8464          40 :                     discontinuityLeft[1] =
    8465          80 :                         XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
    8466          40 :                     discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
    8467          40 :                                                   dfYMidReprojectedBottom);
    8468          40 :                     discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
    8469          40 :                     discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
    8470             : 
    8471          40 :                     discontinuityRight[0] =
    8472          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8473          40 :                     discontinuityRight[1] =
    8474          80 :                         XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
    8475          40 :                     discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
    8476          40 :                                                    dfYMidReprojectedBottom);
    8477          40 :                     discontinuityRight[3] =
    8478          80 :                         XYPair(adfX1[iX + 1], adfY1[iX + 1]);
    8479          40 :                     discontinuityRight[4] =
    8480          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8481             : 
    8482          40 :                     sp.dfArea = getArea(discontinuityLeft) +
    8483          40 :                                 getArea(discontinuityRight);
    8484          40 :                     if (getInsideXSign(adfX0[iX]) >= 1)
    8485             :                     {
    8486          20 :                         sp.dfDstX1 = dfXMidReprojectedLeftTop;
    8487          20 :                         sp.dfDstY1 = dfYMidReprojectedTop;
    8488          20 :                         sp.dfDstX2 = dfXMidReprojectedLeftBottom;
    8489          20 :                         sp.dfDstY2 = dfYMidReprojectedBottom;
    8490             :                     }
    8491             :                     else
    8492             :                     {
    8493          20 :                         sp.dfDstX0 = dfXMidReprojectedRightTop;
    8494          20 :                         sp.dfDstY0 = dfYMidReprojectedTop;
    8495          20 :                         sp.dfDstX3 = dfXMidReprojectedRightBottom;
    8496          20 :                         sp.dfDstY3 = dfYMidReprojectedBottom;
    8497             :                     }
    8498             :                 }
    8499             : 
    8500             :                 // Bounding box of source pixel (expressed in target pixel
    8501             :                 // coordinates)
    8502             :                 CPLRectObj sRect;
    8503      103410 :                 sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
    8504      103410 :                                       std::min(sp.dfDstX2, sp.dfDstX3));
    8505      103410 :                 sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
    8506      103410 :                                       std::min(sp.dfDstY2, sp.dfDstY3));
    8507      103410 :                 sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
    8508      103410 :                                       std::max(sp.dfDstX2, sp.dfDstX3));
    8509      103410 :                 sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
    8510      103410 :                                       std::max(sp.dfDstY2, sp.dfDstY3));
    8511      103410 :                 if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
    8512      101350 :                       sRect.miny < iYMax && sRect.maxy > iYMin))
    8513             :                 {
    8514       10852 :                     continue;
    8515             :                 }
    8516             : 
    8517       92558 :                 sp.iSrcX = iX;
    8518       92558 :                 sp.iSrcY = iY;
    8519             : 
    8520       92558 :                 if (!bIsAffineNoRotation)
    8521             :                 {
    8522             :                     // Check polygon validity (no self-crossing)
    8523       89745 :                     XYPair xy;
    8524       89745 :                     if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
    8525       89745 :                                         XYPair(sp.dfDstX1, sp.dfDstY1),
    8526       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8527      269235 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
    8528       89745 :                         getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
    8529       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8530       89745 :                                         XYPair(sp.dfDstX0, sp.dfDstY0),
    8531      179490 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy))
    8532             :                     {
    8533         113 :                         continue;
    8534             :                     }
    8535             :                 }
    8536             : 
    8537       92445 :                 CPLQuadTreeInsertWithBounds(
    8538             :                     hQuadTree,
    8539             :                     reinterpret_cast<void *>(
    8540       92445 :                         static_cast<uintptr_t>(sourcePixels.size())),
    8541             :                     &sRect);
    8542             : 
    8543       92445 :                 sourcePixels.push_back(sp);
    8544             :             }
    8545             :         }
    8546             :     }
    8547             : 
    8548          36 :     std::vector<double> adfRealValue(poWK->nBands);
    8549          36 :     std::vector<double> adfImagValue(poWK->nBands);
    8550          36 :     std::vector<double> adfBandDensity(poWK->nBands);
    8551          36 :     std::vector<double> adfWeight(poWK->nBands);
    8552             : 
    8553             : #ifdef CHECK_SUM_WITH_GEOS
    8554             :     auto hGEOSContext = OGRGeometry::createGEOSContext();
    8555             :     auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8556             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
    8557             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
    8558             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
    8559             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
    8560             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
    8561             :     auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
    8562             :     auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
    8563             : 
    8564             :     auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8565             :     auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
    8566             :     auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
    8567             : #endif
    8568             : 
    8569             :     const XYPoly xy1{
    8570          36 :         {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
    8571          36 :     XYPoly xy2(5);
    8572          36 :     XYPoly xy2_triangle(4);
    8573          36 :     XYPoly intersection;
    8574             : 
    8575             :     /* ==================================================================== */
    8576             :     /*      Loop over output lines.                                         */
    8577             :     /* ==================================================================== */
    8578         891 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    8579             :     {
    8580             :         CPLRectObj sRect;
    8581         873 :         sRect.miny = iDstY;
    8582         873 :         sRect.maxy = iDstY + 1;
    8583             : 
    8584             :         /* ====================================================================
    8585             :          */
    8586             :         /*      Loop over pixels in output scanline. */
    8587             :         /* ====================================================================
    8588             :          */
    8589      221042 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    8590             :         {
    8591      220169 :             sRect.minx = iDstX;
    8592      220169 :             sRect.maxx = iDstX + 1;
    8593      220169 :             int nSourcePixels = 0;
    8594             :             void **pahSourcePixel =
    8595      220169 :                 CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
    8596      220169 :             if (nSourcePixels == 0)
    8597             :             {
    8598        1258 :                 CPLFree(pahSourcePixel);
    8599        1262 :                 continue;
    8600             :             }
    8601             : 
    8602      218911 :             std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
    8603      218911 :             std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
    8604      218911 :             std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
    8605      218911 :             std::fill(adfWeight.begin(), adfWeight.end(), 0);
    8606      218911 :             double dfDensity = 0;
    8607      218911 :             double dfTotalWeight = 0;
    8608             : 
    8609             :             /* ====================================================================
    8610             :              */
    8611             :             /*          Iterate over each contributing source pixel to add its
    8612             :              */
    8613             :             /*          value weighed by the ratio of the area of its
    8614             :              * intersection  */
    8615             :             /*          with the target pixel divided by the area of the source
    8616             :              */
    8617             :             /*          pixel. */
    8618             :             /* ====================================================================
    8619             :              */
    8620     1020520 :             for (int i = 0; i < nSourcePixels; ++i)
    8621             :             {
    8622      801614 :                 const int iSourcePixel = static_cast<int>(
    8623      801614 :                     reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
    8624      801614 :                 auto &sp = sourcePixels[iSourcePixel];
    8625             : 
    8626      801614 :                 double dfWeight = 0.0;
    8627      801614 :                 if (bIsAffineNoRotation)
    8628             :                 {
    8629             :                     // Optimization since the source pixel is a rectangle in
    8630             :                     // target pixel coordinates
    8631       16312 :                     double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
    8632       16312 :                     double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
    8633       16312 :                     double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
    8634       16312 :                     double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
    8635       16312 :                     double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
    8636       16312 :                     double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
    8637       16312 :                     double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
    8638       16312 :                     double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
    8639       16312 :                     dfWeight =
    8640       16312 :                         ((dfIntersMaxX - dfIntersMinX) *
    8641       16312 :                          (dfIntersMaxY - dfIntersMinY)) /
    8642       16312 :                         ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
    8643             :                 }
    8644             :                 else
    8645             :                 {
    8646             :                     // Compute the polygon of the source pixel in target pixel
    8647             :                     // coordinates, and shifted to the target pixel (unit square
    8648             :                     // coordinates)
    8649             : 
    8650      785302 :                     xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    8651      785302 :                     xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
    8652      785302 :                     xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
    8653      785302 :                     xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
    8654      785302 :                     xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    8655             : 
    8656      785302 :                     if (isConvex(xy2))
    8657             :                     {
    8658      785302 :                         getConvexPolyIntersection(xy1, xy2, intersection);
    8659      785302 :                         if (intersection.size() >= 3)
    8660             :                         {
    8661      468849 :                             dfWeight = getArea(intersection);
    8662             :                         }
    8663             :                     }
    8664             :                     else
    8665             :                     {
    8666             :                         // Split xy2 into 2 triangles.
    8667           0 :                         xy2_triangle[0] = xy2[0];
    8668           0 :                         xy2_triangle[1] = xy2[1];
    8669           0 :                         xy2_triangle[2] = xy2[2];
    8670           0 :                         xy2_triangle[3] = xy2[0];
    8671           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    8672             :                                                   intersection);
    8673           0 :                         if (intersection.size() >= 3)
    8674             :                         {
    8675           0 :                             dfWeight = getArea(intersection);
    8676             :                         }
    8677             : 
    8678           0 :                         xy2_triangle[1] = xy2[2];
    8679           0 :                         xy2_triangle[2] = xy2[3];
    8680           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    8681             :                                                   intersection);
    8682           0 :                         if (intersection.size() >= 3)
    8683             :                         {
    8684           0 :                             dfWeight += getArea(intersection);
    8685             :                         }
    8686             :                     }
    8687      785302 :                     if (dfWeight > 0.0)
    8688             :                     {
    8689      468828 :                         if (sp.dfArea == 0)
    8690       89592 :                             sp.dfArea = getArea(xy2);
    8691      468828 :                         dfWeight /= sp.dfArea;
    8692             :                     }
    8693             : 
    8694             : #ifdef CHECK_SUM_WITH_GEOS
    8695             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
    8696             :                                          sp.dfDstX0 - iDstX,
    8697             :                                          sp.dfDstY0 - iDstY);
    8698             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
    8699             :                                          sp.dfDstX1 - iDstX,
    8700             :                                          sp.dfDstY1 - iDstY);
    8701             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
    8702             :                                          sp.dfDstX2 - iDstX,
    8703             :                                          sp.dfDstY2 - iDstY);
    8704             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
    8705             :                                          sp.dfDstX3 - iDstX,
    8706             :                                          sp.dfDstY3 - iDstY);
    8707             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
    8708             :                                          sp.dfDstX0 - iDstX,
    8709             :                                          sp.dfDstY0 - iDstY);
    8710             : 
    8711             :                     double dfWeightGEOS = 0.0;
    8712             :                     auto hIntersection =
    8713             :                         GEOSIntersection_r(hGEOSContext, hP1, hP2);
    8714             :                     if (hIntersection)
    8715             :                     {
    8716             :                         double dfIntersArea = 0.0;
    8717             :                         if (GEOSArea_r(hGEOSContext, hIntersection,
    8718             :                                        &dfIntersArea) &&
    8719             :                             dfIntersArea > 0)
    8720             :                         {
    8721             :                             double dfSourceArea = 0.0;
    8722             :                             if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
    8723             :                             {
    8724             :                                 dfWeightGEOS = dfIntersArea / dfSourceArea;
    8725             :                             }
    8726             :                         }
    8727             :                         GEOSGeom_destroy_r(hGEOSContext, hIntersection);
    8728             :                     }
    8729             :                     if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
    8730             :                     {
    8731             :                         /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
    8732             :                                         dfWeight, dfWeightGEOS);
    8733             :                         printf("xy2: ");  // ok
    8734             :                         for (const auto &xy : xy2)
    8735             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    8736             :                         printf("\n");                                   // ok
    8737             :                         printf("intersection: ");                       // ok
    8738             :                         for (const auto &xy : intersection)
    8739             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    8740             :                         printf("\n");                                   // ok
    8741             :                     }
    8742             : #endif
    8743             :                 }
    8744      801614 :                 if (dfWeight > 0.0)
    8745             :                 {
    8746      474099 :                     const GPtrDiff_t iSrcOffset =
    8747      474099 :                         sp.iSrcX +
    8748      474099 :                         static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
    8749      474099 :                     dfTotalWeight += dfWeight;
    8750             : 
    8751      474099 :                     if (poWK->pafUnifiedSrcDensity != nullptr)
    8752             :                     {
    8753           0 :                         dfDensity +=
    8754           0 :                             dfWeight * poWK->pafUnifiedSrcDensity[iSrcOffset];
    8755             :                     }
    8756             :                     else
    8757             :                     {
    8758      474099 :                         dfDensity += dfWeight;
    8759             :                     }
    8760             : 
    8761     1818720 :                     for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    8762             :                     {
    8763             :                         // Returns pixel value if it is not no data.
    8764             :                         double dfBandDensity;
    8765             :                         double dfRealValue;
    8766             :                         double dfImagValue;
    8767     2689240 :                         if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
    8768             :                                                &dfBandDensity, &dfRealValue,
    8769             :                                                &dfImagValue) &&
    8770     1344620 :                               dfBandDensity > BAND_DENSITY_THRESHOLD))
    8771             :                         {
    8772           0 :                             continue;
    8773             :                         }
    8774             : 
    8775     1344620 :                         adfRealValue[iBand] += dfRealValue * dfWeight;
    8776     1344620 :                         adfImagValue[iBand] += dfImagValue * dfWeight;
    8777     1344620 :                         adfBandDensity[iBand] += dfBandDensity * dfWeight;
    8778     1344620 :                         adfWeight[iBand] += dfWeight;
    8779             :                     }
    8780             :                 }
    8781             :             }
    8782             : 
    8783      218911 :             CPLFree(pahSourcePixel);
    8784             : 
    8785             :             /* --------------------------------------------------------------------
    8786             :              */
    8787             :             /*          Update destination pixel value. */
    8788             :             /* --------------------------------------------------------------------
    8789             :              */
    8790      218911 :             bool bHasFoundDensity = false;
    8791      218911 :             const GPtrDiff_t iDstOffset =
    8792      218911 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    8793      827822 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    8794             :             {
    8795      608911 :                 if (adfWeight[iBand] > 0)
    8796             :                 {
    8797             :                     const double dfBandDensity =
    8798      608907 :                         adfBandDensity[iBand] / adfWeight[iBand];
    8799      608907 :                     if (dfBandDensity > BAND_DENSITY_THRESHOLD)
    8800             :                     {
    8801      608907 :                         bHasFoundDensity = true;
    8802      608907 :                         GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    8803      608907 :                                          adfRealValue[iBand],
    8804      608907 :                                          adfImagValue[iBand]);
    8805             :                     }
    8806             :                 }
    8807             :             }
    8808             : 
    8809      218911 :             if (!bHasFoundDensity)
    8810           4 :                 continue;
    8811             : 
    8812             :             /* --------------------------------------------------------------------
    8813             :              */
    8814             :             /*          Update destination density/validity masks. */
    8815             :             /* --------------------------------------------------------------------
    8816             :              */
    8817      218907 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
    8818             : 
    8819      218907 :             if (poWK->panDstValid != nullptr)
    8820             :             {
    8821       11750 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    8822             :             }
    8823             :         }
    8824             : 
    8825             :         /* --------------------------------------------------------------------
    8826             :          */
    8827             :         /*      Report progress to the user, and optionally cancel out. */
    8828             :         /* --------------------------------------------------------------------
    8829             :          */
    8830         873 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    8831           0 :             break;
    8832             :     }
    8833             : 
    8834             : #ifdef CHECK_SUM_WITH_GEOS
    8835             :     GEOSGeom_destroy_r(hGEOSContext, hP1);
    8836             :     GEOSGeom_destroy_r(hGEOSContext, hP2);
    8837             :     OGRGeometry::freeGEOSContext(hGEOSContext);
    8838             : #endif
    8839          18 :     CPLQuadTreeDestroy(hQuadTree);
    8840          18 : }

Generated by: LCOV version 1.14