LCOV - code coverage report
Current view: top level - alg - gdalwarpkernel.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 3080 3793 81.2 %
Date: 2025-01-18 12:42:00 Functions: 153 161 95.0 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  High Performance Image Reprojector
       4             :  * Purpose:  Implementation of the GDALWarpKernel class.  Implements the actual
       5             :  *           image warping for a "chunk" of input and output imagery already
       6             :  *           loaded into memory.
       7             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       8             :  *
       9             :  ******************************************************************************
      10             :  * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
      11             :  * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
      12             :  *
      13             :  * SPDX-License-Identifier: MIT
      14             :  ****************************************************************************/
      15             : 
      16             : #include "cpl_port.h"
      17             : #include "gdalwarper.h"
      18             : 
      19             : #include <cfloat>
      20             : #include <cmath>
      21             : #include <cstddef>
      22             : #include <cstdlib>
      23             : #include <cstring>
      24             : 
      25             : #include <algorithm>
      26             : #include <limits>
      27             : #include <mutex>
      28             : #include <new>
      29             : #include <utility>
      30             : #include <vector>
      31             : 
      32             : #include "cpl_atomic_ops.h"
      33             : #include "cpl_conv.h"
      34             : #include "cpl_error.h"
      35             : #include "cpl_mask.h"
      36             : #include "cpl_multiproc.h"
      37             : #include "cpl_progress.h"
      38             : #include "cpl_string.h"
      39             : #include "cpl_vsi.h"
      40             : #include "cpl_worker_thread_pool.h"
      41             : #include "cpl_quad_tree.h"
      42             : #include "gdal.h"
      43             : #include "gdal_alg.h"
      44             : #include "gdal_alg_priv.h"
      45             : #include "gdal_thread_pool.h"
      46             : #include "gdalresamplingkernels.h"
      47             : #include "gdalwarpkernel_opencl.h"
      48             : 
      49             : // #define CHECK_SUM_WITH_GEOS
      50             : #ifdef CHECK_SUM_WITH_GEOS
      51             : #include "ogr_geometry.h"
      52             : #include "ogr_geos.h"
      53             : #endif
      54             : 
      55             : #ifdef USE_NEON_OPTIMIZATIONS
      56             : #include "include_sse2neon.h"
      57             : #define USE_SSE2
      58             : 
      59             : #include "gdalsse_priv.h"
      60             : 
      61             : // We restrict to 64bit processors because they are guaranteed to have SSE2.
      62             : // Could possibly be used too on 32bit, but we would need to check at runtime.
      63             : #elif defined(__x86_64) || defined(_M_X64)
      64             : #define USE_SSE2
      65             : 
      66             : #include "gdalsse_priv.h"
      67             : 
      68             : #if __SSE4_1__
      69             : #include <smmintrin.h>
      70             : #endif
      71             : 
      72             : #if __SSE3__
      73             : #include <pmmintrin.h>
      74             : #endif
      75             : 
      76             : #endif
      77             : 
      78             : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
      79             : constexpr float SRC_DENSITY_THRESHOLD = 0.000000001f;
      80             : 
      81             : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
      82             : 
      83             : static const int anGWKFilterRadius[] = {
      84             :     0,  // Nearest neighbour
      85             :     1,  // Bilinear
      86             :     2,  // Cubic Convolution (Catmull-Rom)
      87             :     2,  // Cubic B-Spline
      88             :     3,  // Lanczos windowed sinc
      89             :     0,  // Average
      90             :     0,  // Mode
      91             :     0,  // Reserved GRA_Gauss=7
      92             :     0,  // Max
      93             :     0,  // Min
      94             :     0,  // Med
      95             :     0,  // Q1
      96             :     0,  // Q3
      97             :     0,  // Sum
      98             :     0,  // RMS
      99             : };
     100             : 
     101             : static double GWKBilinear(double dfX);
     102             : static double GWKCubic(double dfX);
     103             : static double GWKBSpline(double dfX);
     104             : static double GWKLanczosSinc(double dfX);
     105             : 
     106             : static const FilterFuncType apfGWKFilter[] = {
     107             :     nullptr,         // Nearest neighbour
     108             :     GWKBilinear,     // Bilinear
     109             :     GWKCubic,        // Cubic Convolution (Catmull-Rom)
     110             :     GWKBSpline,      // Cubic B-Spline
     111             :     GWKLanczosSinc,  // Lanczos windowed sinc
     112             :     nullptr,         // Average
     113             :     nullptr,         // Mode
     114             :     nullptr,         // Reserved GRA_Gauss=7
     115             :     nullptr,         // Max
     116             :     nullptr,         // Min
     117             :     nullptr,         // Med
     118             :     nullptr,         // Q1
     119             :     nullptr,         // Q3
     120             :     nullptr,         // Sum
     121             :     nullptr,         // RMS
     122             : };
     123             : 
     124             : // TODO(schwehr): Can we make these functions have a const * const arg?
     125             : static double GWKBilinear4Values(double *padfVals);
     126             : static double GWKCubic4Values(double *padfVals);
     127             : static double GWKBSpline4Values(double *padfVals);
     128             : static double GWKLanczosSinc4Values(double *padfVals);
     129             : 
     130             : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
     131             :     nullptr,                // Nearest neighbour
     132             :     GWKBilinear4Values,     // Bilinear
     133             :     GWKCubic4Values,        // Cubic Convolution (Catmull-Rom)
     134             :     GWKBSpline4Values,      // Cubic B-Spline
     135             :     GWKLanczosSinc4Values,  // Lanczos windowed sinc
     136             :     nullptr,                // Average
     137             :     nullptr,                // Mode
     138             :     nullptr,                // Reserved GRA_Gauss=7
     139             :     nullptr,                // Max
     140             :     nullptr,                // Min
     141             :     nullptr,                // Med
     142             :     nullptr,                // Q1
     143             :     nullptr,                // Q3
     144             :     nullptr,                // Sum
     145             :     nullptr,                // RMS
     146             : };
     147             : 
     148        9583 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
     149             : {
     150             :     static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
     151             :                   "Bad size of anGWKFilterRadius");
     152        9583 :     return anGWKFilterRadius[eResampleAlg];
     153             : }
     154             : 
     155        3669 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
     156             : {
     157             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
     158             :                   "Bad size of apfGWKFilter");
     159        3669 :     return apfGWKFilter[eResampleAlg];
     160             : }
     161             : 
     162        3667 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
     163             : {
     164             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
     165             :                   "Bad size of apfGWKFilter4Values");
     166        3667 :     return apfGWKFilter4Values[eResampleAlg];
     167             : }
     168             : 
     169             : #ifdef HAVE_OPENCL
     170             : static CPLErr GWKOpenCLCase(GDALWarpKernel *);
     171             : #endif
     172             : 
     173             : static CPLErr GWKGeneralCase(GDALWarpKernel *);
     174             : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
     175             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     176             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     177             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     178             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     179             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     180             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     181             : #endif
     182             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     183             : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
     184             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     185             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     186             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     187             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     188             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     189             : #endif
     190             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     191             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     192             : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
     193             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     194             : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
     195             : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
     196             : static CPLErr GWKSumPreserving(GDALWarpKernel *);
     197             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     198             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     199             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     200             : 
     201             : /************************************************************************/
     202             : /*                           GWKJobStruct                               */
     203             : /************************************************************************/
     204             : 
     205             : struct GWKJobStruct
     206             : {
     207             :     std::mutex &mutex;
     208             :     std::condition_variable &cv;
     209             :     int &counter;
     210             :     bool &stopFlag;
     211             :     GDALWarpKernel *poWK;
     212             :     int iYMin;
     213             :     int iYMax;
     214             :     int (*pfnProgress)(GWKJobStruct *psJob);
     215             :     void *pTransformerArg;
     216             :     void (*pfnFunc)(
     217             :         void *);  // used by GWKRun() to assign the proper pTransformerArg
     218             : 
     219        2032 :     GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
     220             :                  int &counter_, bool &stopFlag_)
     221        2032 :         : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_),
     222             :           poWK(nullptr), iYMin(0), iYMax(0), pfnProgress(nullptr),
     223        2032 :           pTransformerArg(nullptr), pfnFunc(nullptr)
     224             :     {
     225        2032 :     }
     226             : };
     227             : 
     228             : struct GWKThreadData
     229             : {
     230             :     std::unique_ptr<CPLJobQueue> poJobQueue{};
     231             :     std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
     232             :     int nMaxThreads{0};
     233             :     int counter{0};
     234             :     bool stopFlag{false};
     235             :     std::mutex mutex{};
     236             :     std::condition_variable cv{};
     237             :     bool bTransformerArgInputAssignedToThread{false};
     238             :     void *pTransformerArgInput{
     239             :         nullptr};  // owned by calling layer. Not to be destroyed
     240             :     std::map<GIntBig, void *> mapThreadToTransformerArg{};
     241             :     int nTotalThreadCountForThisRun = 0;
     242             :     int nCurThreadCountForThisRun = 0;
     243             : };
     244             : 
     245             : /************************************************************************/
     246             : /*                        GWKProgressThread()                           */
     247             : /************************************************************************/
     248             : 
     249             : // Return TRUE if the computation must be interrupted.
     250          18 : static int GWKProgressThread(GWKJobStruct *psJob)
     251             : {
     252          18 :     bool stop = false;
     253             :     {
     254          18 :         std::lock_guard<std::mutex> lock(psJob->mutex);
     255          18 :         psJob->counter++;
     256          18 :         stop = psJob->stopFlag;
     257             :     }
     258          18 :     psJob->cv.notify_one();
     259             : 
     260          18 :     return stop;
     261             : }
     262             : 
     263             : /************************************************************************/
     264             : /*                      GWKProgressMonoThread()                         */
     265             : /************************************************************************/
     266             : 
     267             : // Return TRUE if the computation must be interrupted.
     268      198791 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
     269             : {
     270      198791 :     GDALWarpKernel *poWK = psJob->poWK;
     271             :     // coverity[missing_lock]
     272      198791 :     if (!poWK->pfnProgress(
     273      198791 :             poWK->dfProgressBase +
     274      198791 :                 poWK->dfProgressScale *
     275      198791 :                     (++psJob->counter / static_cast<double>(psJob->iYMax)),
     276             :             "", poWK->pProgress))
     277             :     {
     278           1 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     279           1 :         psJob->stopFlag = true;
     280           1 :         return TRUE;
     281             :     }
     282      198790 :     return FALSE;
     283             : }
     284             : 
     285             : /************************************************************************/
     286             : /*                       GWKGenericMonoThread()                         */
     287             : /************************************************************************/
     288             : 
     289        2027 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
     290             :                                    void (*pfnFunc)(void *pUserData))
     291             : {
     292        2027 :     GWKThreadData td;
     293             : 
     294             :     // NOTE: the mutex is not used.
     295        2027 :     GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
     296        2027 :     job.poWK = poWK;
     297        2027 :     job.iYMin = 0;
     298        2027 :     job.iYMax = poWK->nDstYSize;
     299        2027 :     job.pfnProgress = GWKProgressMonoThread;
     300        2027 :     job.pTransformerArg = poWK->pTransformerArg;
     301        2027 :     pfnFunc(&job);
     302             : 
     303        4054 :     return td.stopFlag ? CE_Failure : CE_None;
     304             : }
     305             : 
     306             : /************************************************************************/
     307             : /*                          GWKThreadsCreate()                          */
     308             : /************************************************************************/
     309             : 
     310        1401 : void *GWKThreadsCreate(char **papszWarpOptions,
     311             :                        GDALTransformerFunc /* pfnTransformer */,
     312             :                        void *pTransformerArg)
     313             : {
     314             :     const char *pszWarpThreads =
     315        1401 :         CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
     316        1401 :     if (pszWarpThreads == nullptr)
     317        1401 :         pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
     318             : 
     319        1401 :     int nThreads = 0;
     320        1401 :     if (EQUAL(pszWarpThreads, "ALL_CPUS"))
     321           3 :         nThreads = CPLGetNumCPUs();
     322             :     else
     323        1398 :         nThreads = atoi(pszWarpThreads);
     324        1401 :     if (nThreads <= 1)
     325        1396 :         nThreads = 0;
     326        1401 :     if (nThreads > 128)
     327           0 :         nThreads = 128;
     328             : 
     329        1401 :     GWKThreadData *psThreadData = new GWKThreadData();
     330             :     auto poThreadPool =
     331        1401 :         nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
     332        1401 :     if (nThreads && poThreadPool)
     333             :     {
     334           5 :         psThreadData->nMaxThreads = nThreads;
     335           5 :         psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
     336             :             nThreads,
     337           5 :             GWKJobStruct(psThreadData->mutex, psThreadData->cv,
     338          10 :                          psThreadData->counter, psThreadData->stopFlag)));
     339             : 
     340           5 :         psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
     341           5 :         psThreadData->pTransformerArgInput = pTransformerArg;
     342             :     }
     343             : 
     344        1401 :     return psThreadData;
     345             : }
     346             : 
     347             : /************************************************************************/
     348             : /*                             GWKThreadsEnd()                          */
     349             : /************************************************************************/
     350             : 
     351        1401 : void GWKThreadsEnd(void *psThreadDataIn)
     352             : {
     353        1401 :     if (psThreadDataIn == nullptr)
     354           0 :         return;
     355             : 
     356        1401 :     GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
     357        1401 :     if (psThreadData->poJobQueue)
     358             :     {
     359             :         // cppcheck-suppress constVariableReference
     360          15 :         for (auto &pair : psThreadData->mapThreadToTransformerArg)
     361             :         {
     362          10 :             CPLAssert(pair.second != psThreadData->pTransformerArgInput);
     363          10 :             GDALDestroyTransformer(pair.second);
     364             :         }
     365           5 :         psThreadData->poJobQueue.reset();
     366             :     }
     367        1401 :     delete psThreadData;
     368             : }
     369             : 
     370             : /************************************************************************/
     371             : /*                         ThreadFuncAdapter()                          */
     372             : /************************************************************************/
     373             : 
     374          15 : static void ThreadFuncAdapter(void *pData)
     375             : {
     376          15 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
     377          15 :     GWKThreadData *psThreadData =
     378          15 :         static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
     379             : 
     380             :     // Look if we have already a per-thread transformer
     381          15 :     void *pTransformerArg = nullptr;
     382          15 :     const GIntBig nThreadId = CPLGetPID();
     383             : 
     384             :     {
     385          30 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     386          15 :         ++psThreadData->nCurThreadCountForThisRun;
     387             : 
     388          15 :         auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
     389          15 :         if (oIter != psThreadData->mapThreadToTransformerArg.end())
     390             :         {
     391           0 :             pTransformerArg = oIter->second;
     392             :         }
     393          15 :         else if (!psThreadData->bTransformerArgInputAssignedToThread &&
     394          15 :                  psThreadData->nCurThreadCountForThisRun ==
     395          15 :                      psThreadData->nTotalThreadCountForThisRun)
     396             :         {
     397             :             // If we are the last thread to be started, temporarily borrow the
     398             :             // original transformer
     399           5 :             psThreadData->bTransformerArgInputAssignedToThread = true;
     400           5 :             pTransformerArg = psThreadData->pTransformerArgInput;
     401           5 :             psThreadData->mapThreadToTransformerArg[nThreadId] =
     402             :                 pTransformerArg;
     403             :         }
     404             : 
     405          15 :         if (pTransformerArg == nullptr)
     406             :         {
     407          10 :             CPLAssert(psThreadData->pTransformerArgInput != nullptr);
     408          10 :             CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
     409             :         }
     410             :     }
     411             : 
     412             :     // If no transformer assigned to current thread, instantiate one
     413          15 :     if (pTransformerArg == nullptr)
     414             :     {
     415             :         // This somehow assumes that GDALCloneTransformer() is thread-safe
     416             :         // which should normally be the case.
     417             :         pTransformerArg =
     418          10 :             GDALCloneTransformer(psThreadData->pTransformerArgInput);
     419             : 
     420             :         // Lock for the stop flag and the transformer map.
     421          10 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     422          10 :         if (!pTransformerArg)
     423             :         {
     424           0 :             psJob->stopFlag = true;
     425           0 :             return;
     426             :         }
     427          10 :         psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
     428             :     }
     429             : 
     430          15 :     psJob->pTransformerArg = pTransformerArg;
     431          15 :     psJob->pfnFunc(pData);
     432             : 
     433             :     // Give back original transformer, if borrowed.
     434             :     {
     435          30 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     436          15 :         if (psThreadData->bTransformerArgInputAssignedToThread &&
     437           8 :             pTransformerArg == psThreadData->pTransformerArgInput)
     438             :         {
     439             :             psThreadData->mapThreadToTransformerArg.erase(
     440           5 :                 psThreadData->mapThreadToTransformerArg.find(nThreadId));
     441           5 :             psThreadData->bTransformerArgInputAssignedToThread = false;
     442             :         }
     443             :     }
     444             : }
     445             : 
     446             : /************************************************************************/
     447             : /*                                GWKRun()                              */
     448             : /************************************************************************/
     449             : 
     450        2032 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
     451             :                      void (*pfnFunc)(void *pUserData))
     452             : 
     453             : {
     454        2032 :     const int nDstYSize = poWK->nDstYSize;
     455             : 
     456        2032 :     CPLDebug("GDAL",
     457             :              "GDALWarpKernel()::%s() "
     458             :              "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
     459             :              pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
     460             :              poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
     461             :              poWK->nDstYSize);
     462             : 
     463        2032 :     if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
     464             :     {
     465           0 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     466           0 :         return CE_Failure;
     467             :     }
     468             : 
     469        2032 :     GWKThreadData *psThreadData =
     470             :         static_cast<GWKThreadData *>(poWK->psThreadData);
     471        2032 :     if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
     472             :     {
     473        2027 :         return GWKGenericMonoThread(poWK, pfnFunc);
     474             :     }
     475             : 
     476           5 :     int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
     477             :     // Config option mostly useful for tests to be able to test multithreading
     478             :     // with small rasters
     479             :     const int nWarpChunkSize =
     480           5 :         atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
     481           5 :     if (nWarpChunkSize > 0)
     482             :     {
     483           3 :         GIntBig nChunks =
     484           3 :             static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
     485           3 :         if (nThreads > nChunks)
     486           1 :             nThreads = static_cast<int>(nChunks);
     487             :     }
     488           5 :     if (nThreads <= 0)
     489           1 :         nThreads = 1;
     490             : 
     491           5 :     CPLDebug("WARP", "Using %d threads", nThreads);
     492             : 
     493           5 :     auto &jobs = *psThreadData->threadJobs;
     494           5 :     CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
     495             :     // Fill-in job structures.
     496          20 :     for (int i = 0; i < nThreads; ++i)
     497             :     {
     498          15 :         auto &job = jobs[i];
     499          15 :         job.poWK = poWK;
     500          15 :         job.iYMin =
     501          15 :             static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
     502          15 :         job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
     503          15 :                                      nThreads);
     504          15 :         if (poWK->pfnProgress != GDALDummyProgress)
     505           1 :             job.pfnProgress = GWKProgressThread;
     506          15 :         job.pfnFunc = pfnFunc;
     507             :     }
     508             : 
     509             :     bool bStopFlag;
     510             :     {
     511           5 :         std::unique_lock<std::mutex> lock(psThreadData->mutex);
     512             : 
     513           5 :         psThreadData->nTotalThreadCountForThisRun = nThreads;
     514             :         // coverity[missing_lock]
     515           5 :         psThreadData->nCurThreadCountForThisRun = 0;
     516             : 
     517             :         // Start jobs.
     518          20 :         for (int i = 0; i < nThreads; ++i)
     519             :         {
     520          15 :             auto &job = jobs[i];
     521          15 :             psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
     522             :                                                 static_cast<void *>(&job));
     523             :         }
     524             : 
     525             :         /* --------------------------------------------------------------------
     526             :          */
     527             :         /*      Report progress. */
     528             :         /* --------------------------------------------------------------------
     529             :          */
     530           5 :         if (poWK->pfnProgress != GDALDummyProgress)
     531             :         {
     532           1 :             while (psThreadData->counter < nDstYSize)
     533             :             {
     534           1 :                 psThreadData->cv.wait(lock);
     535           1 :                 if (!poWK->pfnProgress(poWK->dfProgressBase +
     536           1 :                                            poWK->dfProgressScale *
     537           1 :                                                (psThreadData->counter /
     538           1 :                                                 static_cast<double>(nDstYSize)),
     539             :                                        "", poWK->pProgress))
     540             :                 {
     541           1 :                     CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     542           1 :                     psThreadData->stopFlag = true;
     543           1 :                     break;
     544             :                 }
     545             :             }
     546             :         }
     547             : 
     548           5 :         bStopFlag = psThreadData->stopFlag;
     549             :     }
     550             : 
     551             :     /* -------------------------------------------------------------------- */
     552             :     /*      Wait for all jobs to complete.                                  */
     553             :     /* -------------------------------------------------------------------- */
     554           5 :     psThreadData->poJobQueue->WaitCompletion();
     555             : 
     556           5 :     return bStopFlag ? CE_Failure : CE_None;
     557             : }
     558             : 
     559             : /************************************************************************/
     560             : /* ==================================================================== */
     561             : /*                            GDALWarpKernel                            */
     562             : /* ==================================================================== */
     563             : /************************************************************************/
     564             : 
     565             : /**
     566             :  * \class GDALWarpKernel "gdalwarper.h"
     567             :  *
     568             :  * Low level image warping class.
     569             :  *
     570             :  * This class is responsible for low level image warping for one
     571             :  * "chunk" of imagery.  The class is essentially a structure with all
     572             :  * data members public - primarily so that new special-case functions
     573             :  * can be added without changing the class declaration.
     574             :  *
     575             :  * Applications are normally intended to interactive with warping facilities
     576             :  * through the GDALWarpOperation class, though the GDALWarpKernel can in
     577             :  * theory be used directly if great care is taken in setting up the
     578             :  * control data.
     579             :  *
     580             :  * <h3>Design Issues</h3>
     581             :  *
     582             :  * The intention is that PerformWarp() would analyze the setup in terms
     583             :  * of the datatype, resampling type, and validity/density mask usage and
     584             :  * pick one of many specific implementations of the warping algorithm over
     585             :  * a continuum of optimization vs. generality.  At one end there will be a
     586             :  * reference general purpose implementation of the algorithm that supports
     587             :  * any data type (working internally in double precision complex), all three
     588             :  * resampling types, and any or all of the validity/density masks.  At the
     589             :  * other end would be highly optimized algorithms for common cases like
     590             :  * nearest neighbour resampling on GDT_Byte data with no masks.
     591             :  *
     592             :  * The full set of optimized versions have not been decided but we should
     593             :  * expect to have at least:
     594             :  *  - One for each resampling algorithm for 8bit data with no masks.
     595             :  *  - One for each resampling algorithm for float data with no masks.
     596             :  *  - One for each resampling algorithm for float data with any/all masks
     597             :  *    (essentially the generic case for just float data).
     598             :  *  - One for each resampling algorithm for 8bit data with support for
     599             :  *    input validity masks (per band or per pixel).  This handles the common
     600             :  *    case of nodata masking.
     601             :  *  - One for each resampling algorithm for float data with support for
     602             :  *    input validity masks (per band or per pixel).  This handles the common
     603             :  *    case of nodata masking.
     604             :  *
     605             :  * Some of the specializations would operate on all bands in one pass
     606             :  * (especially the ones without masking would do this), while others might
     607             :  * process each band individually to reduce code complexity.
     608             :  *
     609             :  * <h3>Masking Semantics</h3>
     610             :  *
     611             :  * A detailed explanation of the semantics of the validity and density masks,
     612             :  * and their effects on resampling kernels is needed here.
     613             :  */
     614             : 
     615             : /************************************************************************/
     616             : /*                     GDALWarpKernel Data Members                      */
     617             : /************************************************************************/
     618             : 
     619             : /**
     620             :  * \var GDALResampleAlg GDALWarpKernel::eResample;
     621             :  *
     622             :  * Resampling algorithm.
     623             :  *
     624             :  * The resampling algorithm to use.  One of GRA_NearestNeighbour, GRA_Bilinear,
     625             :  * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
     626             :  * GRA_Mode or GRA_Sum.
     627             :  *
     628             :  * This field is required. GDT_NearestNeighbour may be used as a default
     629             :  * value.
     630             :  */
     631             : 
     632             : /**
     633             :  * \var GDALDataType GDALWarpKernel::eWorkingDataType;
     634             :  *
     635             :  * Working pixel data type.
     636             :  *
     637             :  * The datatype of pixels in the source image (papabySrcimage) and
     638             :  * destination image (papabyDstImage) buffers.  Note that operations on
     639             :  * some data types (such as GDT_Byte) may be much better optimized than other
     640             :  * less common cases.
     641             :  *
     642             :  * This field is required.  It may not be GDT_Unknown.
     643             :  */
     644             : 
     645             : /**
     646             :  * \var int GDALWarpKernel::nBands;
     647             :  *
     648             :  * Number of bands.
     649             :  *
     650             :  * The number of bands (layers) of imagery being warped.  Determines the
     651             :  * number of entries in the papabySrcImage, papanBandSrcValid,
     652             :  * and papabyDstImage arrays.
     653             :  *
     654             :  * This field is required.
     655             :  */
     656             : 
     657             : /**
     658             :  * \var int GDALWarpKernel::nSrcXSize;
     659             :  *
     660             :  * Source image width in pixels.
     661             :  *
     662             :  * This field is required.
     663             :  */
     664             : 
     665             : /**
     666             :  * \var int GDALWarpKernel::nSrcYSize;
     667             :  *
     668             :  * Source image height in pixels.
     669             :  *
     670             :  * This field is required.
     671             :  */
     672             : 
     673             : /**
     674             :  * \var double GDALWarpKernel::dfSrcXExtraSize;
     675             :  *
     676             :  * Number of pixels included in nSrcXSize that are present on the edges of
     677             :  * the area of interest to take into account the width of the kernel.
     678             :  *
     679             :  * This field is required.
     680             :  */
     681             : 
     682             : /**
     683             :  * \var double GDALWarpKernel::dfSrcYExtraSize;
     684             :  *
     685             :  * Number of pixels included in nSrcYExtraSize that are present on the edges of
     686             :  * the area of interest to take into account the height of the kernel.
     687             :  *
     688             :  * This field is required.
     689             :  */
     690             : 
     691             : /**
     692             :  * \var int GDALWarpKernel::papabySrcImage;
     693             :  *
     694             :  * Array of source image band data.
     695             :  *
     696             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     697             :  * to image data.  Each individual band of image data is organized as a single
     698             :  * block of image data in left to right, then bottom to top order.  The actual
     699             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     700             :  *
     701             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     702             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     703             :  * this:
     704             :  *
     705             :  * \code
     706             :  *   float dfPixelValue;
     707             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     708             :  *   int   nPixel = 3; // Zero based.
     709             :  *   int   nLine = 4;  // Zero based.
     710             :  *
     711             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     712             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     713             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     714             :  *   dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
     715             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     716             :  * \endcode
     717             :  *
     718             :  * This field is required.
     719             :  */
     720             : 
     721             : /**
     722             :  * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
     723             :  *
     724             :  * Per band validity mask for source pixels.
     725             :  *
     726             :  * Array of pixel validity mask layers for each source band.   Each of
     727             :  * the mask layers is the same size (in pixels) as the source image with
     728             :  * one bit per pixel.  Note that it is legal (and common) for this to be
     729             :  * NULL indicating that none of the pixels are invalidated, or for some
     730             :  * band validity masks to be NULL in which case all pixels of the band are
     731             :  * valid.  The following code can be used to test the validity of a particular
     732             :  * pixel.
     733             :  *
     734             :  * \code
     735             :  *   int   bIsValid = TRUE;
     736             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     737             :  *   int   nPixel = 3; // Zero based.
     738             :  *   int   nLine = 4;  // Zero based.
     739             :  *
     740             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     741             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     742             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     743             :  *
     744             :  *   if( poKern->papanBandSrcValid != NULL
     745             :  *       && poKern->papanBandSrcValid[nBand] != NULL )
     746             :  *   {
     747             :  *       GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
     748             :  *       int    iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
     749             :  *
     750             :  *       bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
     751             :  *   }
     752             :  * \endcode
     753             :  */
     754             : 
     755             : /**
     756             :  * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
     757             :  *
     758             :  * Per pixel validity mask for source pixels.
     759             :  *
     760             :  * A single validity mask layer that applies to the pixels of all source
     761             :  * bands.  It is accessed similarly to papanBandSrcValid, but without the
     762             :  * extra level of band indirection.
     763             :  *
     764             :  * This pointer may be NULL indicating that all pixels are valid.
     765             :  *
     766             :  * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
     767             :  * the pixel isn't considered to be valid unless both arrays indicate it is
     768             :  * valid.
     769             :  */
     770             : 
     771             : /**
     772             :  * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
     773             :  *
     774             :  * Per pixel density mask for source pixels.
     775             :  *
     776             :  * A single density mask layer that applies to the pixels of all source
     777             :  * bands.  It contains values between 0.0 and 1.0 indicating the degree to
     778             :  * which this pixel should be allowed to contribute to the output result.
     779             :  *
     780             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     781             :  *
     782             :  * The density for a pixel may be accessed like this:
     783             :  *
     784             :  * \code
     785             :  *   float fDensity = 1.0;
     786             :  *   int nPixel = 3;  // Zero based.
     787             :  *   int nLine = 4;   // Zero based.
     788             :  *
     789             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     790             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     791             :  *   if( poKern->pafUnifiedSrcDensity != NULL )
     792             :  *     fDensity = poKern->pafUnifiedSrcDensity
     793             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     794             :  * \endcode
     795             :  */
     796             : 
     797             : /**
     798             :  * \var int GDALWarpKernel::nDstXSize;
     799             :  *
     800             :  * Width of destination image in pixels.
     801             :  *
     802             :  * This field is required.
     803             :  */
     804             : 
     805             : /**
     806             :  * \var int GDALWarpKernel::nDstYSize;
     807             :  *
     808             :  * Height of destination image in pixels.
     809             :  *
     810             :  * This field is required.
     811             :  */
     812             : 
     813             : /**
     814             :  * \var GByte **GDALWarpKernel::papabyDstImage;
     815             :  *
     816             :  * Array of destination image band data.
     817             :  *
     818             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     819             :  * to image data.  Each individual band of image data is organized as a single
     820             :  * block of image data in left to right, then bottom to top order.  The actual
     821             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     822             :  *
     823             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     824             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     825             :  * this:
     826             :  *
     827             :  * \code
     828             :  *   float dfPixelValue;
     829             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     830             :  *   int   nPixel = 3; // Zero based.
     831             :  *   int   nLine = 4;  // Zero based.
     832             :  *
     833             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     834             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     835             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     836             :  *   dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
     837             :  *                                  [nPixel + nLine * poKern->nSrcYSize];
     838             :  * \endcode
     839             :  *
     840             :  * This field is required.
     841             :  */
     842             : 
     843             : /**
     844             :  * \var GUInt32 *GDALWarpKernel::panDstValid;
     845             :  *
     846             :  * Per pixel validity mask for destination pixels.
     847             :  *
     848             :  * A single validity mask layer that applies to the pixels of all destination
     849             :  * bands.  It is accessed similarly to papanUnitifiedSrcValid, but based
     850             :  * on the size of the destination image.
     851             :  *
     852             :  * This pointer may be NULL indicating that all pixels are valid.
     853             :  */
     854             : 
     855             : /**
     856             :  * \var float *GDALWarpKernel::pafDstDensity;
     857             :  *
     858             :  * Per pixel density mask for destination pixels.
     859             :  *
     860             :  * A single density mask layer that applies to the pixels of all destination
     861             :  * bands.  It contains values between 0.0 and 1.0.
     862             :  *
     863             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     864             :  *
     865             :  * The density for a pixel may be accessed like this:
     866             :  *
     867             :  * \code
     868             :  *   float fDensity = 1.0;
     869             :  *   int   nPixel = 3; // Zero based.
     870             :  *   int   nLine = 4;  // Zero based.
     871             :  *
     872             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     873             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     874             :  *   if( poKern->pafDstDensity != NULL )
     875             :  *     fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
     876             :  * \endcode
     877             :  */
     878             : 
     879             : /**
     880             :  * \var int GDALWarpKernel::nSrcXOff;
     881             :  *
     882             :  * X offset to source pixel coordinates for transformation.
     883             :  *
     884             :  * See pfnTransformer.
     885             :  *
     886             :  * This field is required.
     887             :  */
     888             : 
     889             : /**
     890             :  * \var int GDALWarpKernel::nSrcYOff;
     891             :  *
     892             :  * Y offset to source pixel coordinates for transformation.
     893             :  *
     894             :  * See pfnTransformer.
     895             :  *
     896             :  * This field is required.
     897             :  */
     898             : 
     899             : /**
     900             :  * \var int GDALWarpKernel::nDstXOff;
     901             :  *
     902             :  * X offset to destination pixel coordinates for transformation.
     903             :  *
     904             :  * See pfnTransformer.
     905             :  *
     906             :  * This field is required.
     907             :  */
     908             : 
     909             : /**
     910             :  * \var int GDALWarpKernel::nDstYOff;
     911             :  *
     912             :  * Y offset to destination pixel coordinates for transformation.
     913             :  *
     914             :  * See pfnTransformer.
     915             :  *
     916             :  * This field is required.
     917             :  */
     918             : 
     919             : /**
     920             :  * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
     921             :  *
     922             :  * Source/destination location transformer.
     923             :  *
     924             :  * The function to call to transform coordinates between source image
     925             :  * pixel/line coordinates and destination image pixel/line coordinates.
     926             :  * See GDALTransformerFunc() for details of the semantics of this function.
     927             :  *
     928             :  * The GDALWarpKern algorithm will only ever use this transformer in
     929             :  * "destination to source" mode (bDstToSrc=TRUE), and will always pass
     930             :  * partial or complete scanlines of points in the destination image as
     931             :  * input.  This means, among other things, that it is safe to the
     932             :  * approximating transform GDALApproxTransform() as the transformation
     933             :  * function.
     934             :  *
     935             :  * Source and destination images may be subsets of a larger overall image.
     936             :  * The transformation algorithms will expect and return pixel/line coordinates
     937             :  * in terms of this larger image, so coordinates need to be offset by
     938             :  * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
     939             :  * passing to pfnTransformer, and after return from it.
     940             :  *
     941             :  * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
     942             :  * data to this function when it is called.
     943             :  *
     944             :  * This field is required.
     945             :  */
     946             : 
     947             : /**
     948             :  * \var void *GDALWarpKernel::pTransformerArg;
     949             :  *
     950             :  * Callback data for pfnTransformer.
     951             :  *
     952             :  * This field may be NULL if not required for the pfnTransformer being used.
     953             :  */
     954             : 
     955             : /**
     956             :  * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
     957             :  *
     958             :  * The function to call to report progress of the algorithm, and to check
     959             :  * for a requested termination of the operation.  It operates according to
     960             :  * GDALProgressFunc() semantics.
     961             :  *
     962             :  * Generally speaking the progress function will be invoked for each
     963             :  * scanline of the destination buffer that has been processed.
     964             :  *
     965             :  * This field may be NULL (internally set to GDALDummyProgress()).
     966             :  */
     967             : 
     968             : /**
     969             :  * \var void *GDALWarpKernel::pProgress;
     970             :  *
     971             :  * Callback data for pfnProgress.
     972             :  *
     973             :  * This field may be NULL if not required for the pfnProgress being used.
     974             :  */
     975             : 
     976             : /************************************************************************/
     977             : /*                           GDALWarpKernel()                           */
     978             : /************************************************************************/
     979             : 
     980        2338 : GDALWarpKernel::GDALWarpKernel()
     981             :     : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
     982             :       eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
     983             :       dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
     984             :       papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
     985             :       pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
     986             :       papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
     987             :       dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
     988             :       nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
     989             :       nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
     990             :       pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
     991             :       pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
     992             :       padfDstNoDataReal(nullptr), psThreadData(nullptr),
     993        2338 :       eTieStrategy(GWKTS_First)
     994             : {
     995        2338 : }
     996             : 
     997             : /************************************************************************/
     998             : /*                          ~GDALWarpKernel()                           */
     999             : /************************************************************************/
    1000             : 
    1001        2338 : GDALWarpKernel::~GDALWarpKernel()
    1002             : {
    1003        2338 : }
    1004             : 
    1005             : /************************************************************************/
    1006             : /*                            PerformWarp()                             */
    1007             : /************************************************************************/
    1008             : 
    1009             : /**
    1010             :  * \fn CPLErr GDALWarpKernel::PerformWarp();
    1011             :  *
    1012             :  * This method performs the warp described in the GDALWarpKernel.
    1013             :  *
    1014             :  * @return CE_None on success or CE_Failure if an error occurs.
    1015             :  */
    1016             : 
    1017        2336 : CPLErr GDALWarpKernel::PerformWarp()
    1018             : 
    1019             : {
    1020        2336 :     const CPLErr eErr = Validate();
    1021             : 
    1022        2336 :     if (eErr != CE_None)
    1023           1 :         return eErr;
    1024             : 
    1025             :     // See #2445 and #3079.
    1026        2335 :     if (nSrcXSize <= 0 || nSrcYSize <= 0)
    1027             :     {
    1028         303 :         if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
    1029             :         {
    1030           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    1031           0 :             return CE_Failure;
    1032             :         }
    1033         303 :         return CE_None;
    1034             :     }
    1035             : 
    1036             :     /* -------------------------------------------------------------------- */
    1037             :     /*      Pre-calculate resampling scales and window sizes for filtering. */
    1038             :     /* -------------------------------------------------------------------- */
    1039             : 
    1040        2032 :     dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
    1041        2032 :     dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
    1042        2032 :     if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
    1043        1263 :         dfXScale = 1.0;
    1044        2032 :     if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
    1045        1016 :         dfYScale = 1.0;
    1046        2032 :     if (dfXScale < 1.0)
    1047             :     {
    1048         548 :         double dfXReciprocalScale = 1.0 / dfXScale;
    1049         548 :         const int nXReciprocalScale =
    1050         548 :             static_cast<int>(dfXReciprocalScale + 0.5);
    1051         548 :         if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
    1052         431 :             dfXScale = 1.0 / nXReciprocalScale;
    1053             :     }
    1054        2032 :     if (dfYScale < 1.0)
    1055             :     {
    1056         517 :         double dfYReciprocalScale = 1.0 / dfYScale;
    1057         517 :         const int nYReciprocalScale =
    1058         517 :             static_cast<int>(dfYReciprocalScale + 0.5);
    1059         517 :         if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
    1060         368 :             dfYScale = 1.0 / nYReciprocalScale;
    1061             :     }
    1062             : 
    1063             :     // XSCALE and YSCALE undocumented for now. Can help in some cases.
    1064             :     // Best would probably be a per-pixel scale computation.
    1065        2032 :     const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
    1066        2032 :     if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
    1067           1 :         dfXScale = CPLAtof(pszXScale);
    1068        2032 :     const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
    1069        2032 :     if (pszYScale != nullptr)
    1070           1 :         dfYScale = CPLAtof(pszYScale);
    1071             : 
    1072             :     // If the xscale is significantly lower than the yscale, this is highly
    1073             :     // suspicious of a situation of wrapping a very large virtual file in
    1074             :     // geographic coordinates with left and right parts being close to the
    1075             :     // antimeridian. In that situation, the xscale computed by the above method
    1076             :     // is completely wrong. Prefer doing an average of a few sample points
    1077             :     // instead
    1078        2032 :     if ((dfYScale / dfXScale > 100 ||
    1079           1 :          (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
    1080             :     {
    1081             :         // Sample points along a grid
    1082           4 :         const int nPointsX = std::min(10, nDstXSize);
    1083           4 :         const int nPointsY = std::min(10, nDstYSize);
    1084           4 :         const int nPoints = 3 * nPointsX * nPointsY;
    1085           8 :         std::vector<double> padfX;
    1086           8 :         std::vector<double> padfY;
    1087           8 :         std::vector<double> padfZ(nPoints);
    1088           8 :         std::vector<int> pabSuccess(nPoints);
    1089          44 :         for (int iY = 0; iY < nPointsY; iY++)
    1090             :         {
    1091         440 :             for (int iX = 0; iX < nPointsX; iX++)
    1092             :             {
    1093         400 :                 const double dfX =
    1094             :                     nPointsX == 1
    1095         400 :                         ? 0.0
    1096         400 :                         : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
    1097         400 :                 const double dfY =
    1098             :                     nPointsY == 1
    1099         400 :                         ? 0.0
    1100         400 :                         : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
    1101             : 
    1102             :                 // Reproject each destination sample point and its neighbours
    1103             :                 // at (x+1,y) and (x,y+1), so as to get the local scale.
    1104         400 :                 padfX.push_back(dfX);
    1105         400 :                 padfY.push_back(dfY);
    1106             : 
    1107         400 :                 padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
    1108         400 :                 padfY.push_back(dfY);
    1109             : 
    1110         400 :                 padfX.push_back(dfX);
    1111         400 :                 padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
    1112             :             }
    1113             :         }
    1114           4 :         pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
    1115           4 :                        &padfZ[0], &pabSuccess[0]);
    1116             : 
    1117             :         // Compute the xscale at each sampling point
    1118           8 :         std::vector<double> adfXScales;
    1119         404 :         for (int i = 0; i < nPoints; i += 3)
    1120             :         {
    1121         400 :             if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
    1122             :             {
    1123             :                 const double dfPointXScale =
    1124         400 :                     1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
    1125         800 :                                    std::abs(padfX[i + 2] - padfX[i]));
    1126         400 :                 adfXScales.push_back(dfPointXScale);
    1127             :             }
    1128             :         }
    1129             : 
    1130             :         // Sort by increasing xcale
    1131           4 :         std::sort(adfXScales.begin(), adfXScales.end());
    1132             : 
    1133           4 :         if (!adfXScales.empty())
    1134             :         {
    1135             :             // Compute the average of scales, but eliminate outliers small
    1136             :             // scales, if some samples are just along the discontinuity.
    1137           4 :             const double dfMaxPointXScale = adfXScales.back();
    1138           4 :             double dfSumPointXScale = 0;
    1139           4 :             int nCountPointScale = 0;
    1140         404 :             for (double dfPointXScale : adfXScales)
    1141             :             {
    1142         400 :                 if (dfPointXScale > dfMaxPointXScale / 10)
    1143             :                 {
    1144         398 :                     dfSumPointXScale += dfPointXScale;
    1145         398 :                     nCountPointScale++;
    1146             :                 }
    1147             :             }
    1148           4 :             if (nCountPointScale > 0)  // should always be true
    1149             :             {
    1150           4 :                 const double dfXScaleFromSampling =
    1151           4 :                     dfSumPointXScale / nCountPointScale;
    1152             : #if DEBUG_VERBOSE
    1153             :                 CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
    1154             :                          dfXScaleFromSampling);
    1155             : #endif
    1156           4 :                 dfXScale = dfXScaleFromSampling;
    1157             :             }
    1158             :         }
    1159             :     }
    1160             : 
    1161             : #if DEBUG_VERBOSE
    1162             :     CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
    1163             : #endif
    1164             : 
    1165        2032 :     const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
    1166             : 
    1167             :     // Safety check for callers that would use GDALWarpKernel without using
    1168             :     // GDALWarpOperation.
    1169        1969 :     if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
    1170        1906 :          ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
    1171        4064 :           !bUse4SamplesFormula)) &&
    1172         388 :         atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
    1173             :             WARP_EXTRA_ELTS)
    1174             :     {
    1175           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1176             :                  "Source arrays must have WARP_EXTRA_ELTS extra elements at "
    1177             :                  "their end. "
    1178             :                  "See GDALWarpKernel class definition. If this condition is "
    1179             :                  "fulfilled, define a EXTRA_ELTS=%d warp options",
    1180             :                  WARP_EXTRA_ELTS);
    1181           0 :         return CE_Failure;
    1182             :     }
    1183             : 
    1184        2032 :     dfXFilter = anGWKFilterRadius[eResample];
    1185        2032 :     dfYFilter = anGWKFilterRadius[eResample];
    1186             : 
    1187        2032 :     nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
    1188        1566 :                               : static_cast<int>(dfXFilter);
    1189        2032 :     nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
    1190        1543 :                               : static_cast<int>(dfYFilter);
    1191             : 
    1192             :     // Filter window offset depends on the parity of the kernel radius.
    1193        2032 :     nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
    1194        2032 :     nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
    1195             : 
    1196        2032 :     bApplyVerticalShift =
    1197        2032 :         CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
    1198        2032 :     dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
    1199        2032 :         papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
    1200             : 
    1201             :     /* -------------------------------------------------------------------- */
    1202             :     /*      Set up resampling functions.                                    */
    1203             :     /* -------------------------------------------------------------------- */
    1204        2032 :     if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
    1205          12 :         return GWKGeneralCase(this);
    1206             : 
    1207             : #if defined(HAVE_OPENCL)
    1208         571 :     if ((eWorkingDataType == GDT_Byte || eWorkingDataType == GDT_CInt16 ||
    1209         399 :          eWorkingDataType == GDT_UInt16 || eWorkingDataType == GDT_Int16 ||
    1210         265 :          eWorkingDataType == GDT_CFloat32 || eWorkingDataType == GDT_Float32) &&
    1211        1875 :         (eResample == GRA_Bilinear || eResample == GRA_Cubic ||
    1212        1435 :          eResample == GRA_CubicSpline || eResample == GRA_Lanczos) &&
    1213        4563 :         !bApplyVerticalShift &&
    1214             :         // OpenCL warping gives different results than the ones expected by autotest,
    1215             :         // so disable it by default even if found.
    1216        1046 :         CPLTestBool(
    1217         523 :             CSLFetchNameValueDef(papszWarpOptions, "USE_OPENCL",
    1218             :                                  CPLGetConfigOption("GDAL_USE_OPENCL", "NO"))))
    1219             :     {
    1220           0 :         if (pafUnifiedSrcDensity != nullptr)
    1221             :         {
    1222             :             // If pafUnifiedSrcDensity is only set to 1.0, then we can
    1223             :             // discard it.
    1224           0 :             bool bFoundNotOne = false;
    1225           0 :             for (GPtrDiff_t j = 0;
    1226           0 :                  j < static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize; j++)
    1227             :             {
    1228           0 :                 if (pafUnifiedSrcDensity[j] != 1.0)
    1229             :                 {
    1230           0 :                     bFoundNotOne = true;
    1231           0 :                     break;
    1232             :                 }
    1233             :             }
    1234           0 :             if (!bFoundNotOne)
    1235             :             {
    1236           0 :                 CPLFree(pafUnifiedSrcDensity);
    1237           0 :                 pafUnifiedSrcDensity = nullptr;
    1238             :             }
    1239             :         }
    1240             : 
    1241           0 :         if (pafUnifiedSrcDensity != nullptr)
    1242             :         {
    1243             :             // Typically if there's a cutline or an alpha band
    1244           0 :             CPLDebugOnce("WARP", "pafUnifiedSrcDensity is not null, "
    1245             :                                  "hence OpenCL warper cannot be used");
    1246             :         }
    1247             :         else
    1248             :         {
    1249           0 :             const CPLErr eResult = GWKOpenCLCase(this);
    1250             : 
    1251             :             // CE_Warning tells us a suitable OpenCL environment was not available
    1252             :             // so we fall through to other CPU based methods.
    1253           0 :             if (eResult != CE_Warning)
    1254           0 :                 return eResult;
    1255             :         }
    1256             :     }
    1257             : #endif  // defined HAVE_OPENCL
    1258             : 
    1259        2020 :     const bool bNoMasksOrDstDensityOnly =
    1260        2016 :         papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
    1261        4036 :         pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
    1262             : 
    1263        2020 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour &&
    1264             :         bNoMasksOrDstDensityOnly)
    1265         863 :         return GWKNearestNoMasksOrDstDensityOnlyByte(this);
    1266             : 
    1267        1157 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear &&
    1268             :         bNoMasksOrDstDensityOnly)
    1269         126 :         return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
    1270             : 
    1271        1031 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_Cubic &&
    1272             :         bNoMasksOrDstDensityOnly)
    1273          72 :         return GWKCubicNoMasksOrDstDensityOnlyByte(this);
    1274             : 
    1275         959 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline &&
    1276             :         bNoMasksOrDstDensityOnly)
    1277          12 :         return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
    1278             : 
    1279         947 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour)
    1280         276 :         return GWKNearestByte(this);
    1281             : 
    1282         671 :     if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
    1283         134 :         eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
    1284          18 :         return GWKNearestNoMasksOrDstDensityOnlyShort(this);
    1285             : 
    1286         653 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
    1287             :         bNoMasksOrDstDensityOnly)
    1288           5 :         return GWKCubicNoMasksOrDstDensityOnlyShort(this);
    1289             : 
    1290         648 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
    1291             :         bNoMasksOrDstDensityOnly)
    1292           6 :         return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
    1293             : 
    1294         642 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
    1295             :         bNoMasksOrDstDensityOnly)
    1296          18 :         return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
    1297             : 
    1298         624 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
    1299             :         bNoMasksOrDstDensityOnly)
    1300          12 :         return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
    1301             : 
    1302         612 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
    1303             :         bNoMasksOrDstDensityOnly)
    1304           5 :         return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
    1305             : 
    1306         607 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
    1307             :         bNoMasksOrDstDensityOnly)
    1308           6 :         return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
    1309             : 
    1310         601 :     if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
    1311          64 :         eResample == GRA_NearestNeighbour)
    1312          27 :         return GWKNearestShort(this);
    1313             : 
    1314         574 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
    1315             :         bNoMasksOrDstDensityOnly)
    1316          11 :         return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
    1317             : 
    1318         563 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
    1319          36 :         return GWKNearestFloat(this);
    1320             : 
    1321         527 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
    1322             :         bNoMasksOrDstDensityOnly)
    1323           5 :         return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
    1324             : 
    1325         522 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
    1326             :         bNoMasksOrDstDensityOnly)
    1327           9 :         return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
    1328             : 
    1329             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    1330             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
    1331             :         bNoMasksOrDstDensityOnly)
    1332             :         return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
    1333             : 
    1334             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
    1335             :         bNoMasksOrDstDensityOnly)
    1336             :         return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
    1337             : #endif
    1338             : 
    1339         513 :     if (eResample == GRA_Average)
    1340          71 :         return GWKAverageOrMode(this);
    1341             : 
    1342         442 :     if (eResample == GRA_RMS)
    1343           9 :         return GWKAverageOrMode(this);
    1344             : 
    1345         433 :     if (eResample == GRA_Mode)
    1346          23 :         return GWKAverageOrMode(this);
    1347             : 
    1348         410 :     if (eResample == GRA_Max)
    1349           6 :         return GWKAverageOrMode(this);
    1350             : 
    1351         404 :     if (eResample == GRA_Min)
    1352           5 :         return GWKAverageOrMode(this);
    1353             : 
    1354         399 :     if (eResample == GRA_Med)
    1355           6 :         return GWKAverageOrMode(this);
    1356             : 
    1357         393 :     if (eResample == GRA_Q1)
    1358           5 :         return GWKAverageOrMode(this);
    1359             : 
    1360         388 :     if (eResample == GRA_Q3)
    1361           5 :         return GWKAverageOrMode(this);
    1362             : 
    1363         383 :     if (eResample == GRA_Sum)
    1364          18 :         return GWKSumPreserving(this);
    1365             : 
    1366         365 :     if (!GDALDataTypeIsComplex(eWorkingDataType))
    1367             :     {
    1368         134 :         return GWKRealCase(this);
    1369             :     }
    1370             : 
    1371         231 :     return GWKGeneralCase(this);
    1372             : }
    1373             : 
    1374             : /************************************************************************/
    1375             : /*                              Validate()                              */
    1376             : /************************************************************************/
    1377             : 
    1378             : /**
    1379             :  * \fn CPLErr GDALWarpKernel::Validate()
    1380             :  *
    1381             :  * Check the settings in the GDALWarpKernel, and issue a CPLError()
    1382             :  * (and return CE_Failure) if the configuration is considered to be
    1383             :  * invalid for some reason.
    1384             :  *
    1385             :  * This method will also do some standard defaulting such as setting
    1386             :  * pfnProgress to GDALDummyProgress() if it is NULL.
    1387             :  *
    1388             :  * @return CE_None on success or CE_Failure if an error is detected.
    1389             :  */
    1390             : 
    1391        2336 : CPLErr GDALWarpKernel::Validate()
    1392             : 
    1393             : {
    1394        2336 :     if (static_cast<size_t>(eResample) >=
    1395             :         (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
    1396             :     {
    1397           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1398             :                  "Unsupported resampling method %d.",
    1399           0 :                  static_cast<int>(eResample));
    1400           0 :         return CE_Failure;
    1401             :     }
    1402             : 
    1403             :     // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
    1404             :     // be ignored as contributing source pixels during resampling. Only taken into account by
    1405             :     // Average currently
    1406             :     const char *pszExcludedValues =
    1407        2336 :         CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
    1408        2336 :     if (pszExcludedValues)
    1409             :     {
    1410             :         const CPLStringList aosTokens(
    1411           8 :             CSLTokenizeString2(pszExcludedValues, "(,)", 0));
    1412           8 :         if ((aosTokens.size() % nBands) != 0)
    1413             :         {
    1414           1 :             CPLError(CE_Failure, CPLE_AppDefined,
    1415             :                      "EXCLUDED_VALUES should contain one or several tuples of "
    1416             :                      "%d values formatted like <R>,<G>,<B> or "
    1417             :                      "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
    1418             :                      "tuples",
    1419             :                      nBands);
    1420           1 :             return CE_Failure;
    1421             :         }
    1422          14 :         std::vector<double> adfTuple;
    1423          28 :         for (int i = 0; i < aosTokens.size(); ++i)
    1424             :         {
    1425          21 :             adfTuple.push_back(CPLAtof(aosTokens[i]));
    1426          21 :             if (((i + 1) % nBands) == 0)
    1427             :             {
    1428           7 :                 m_aadfExcludedValues.push_back(adfTuple);
    1429           7 :                 adfTuple.clear();
    1430             :             }
    1431             :         }
    1432             :     }
    1433             : 
    1434        2335 :     return CE_None;
    1435             : }
    1436             : 
    1437             : /************************************************************************/
    1438             : /*                         GWKOverlayDensity()                          */
    1439             : /*                                                                      */
    1440             : /*      Compute the final density for the destination pixel.  This      */
    1441             : /*      is a function of the overlay density (passed in) and the        */
    1442             : /*      original density.                                               */
    1443             : /************************************************************************/
    1444             : 
    1445     7941290 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
    1446             :                               double dfDensity)
    1447             : {
    1448     7941290 :     if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
    1449     6750420 :         return;
    1450             : 
    1451     1190880 :     poWK->pafDstDensity[iDstOffset] = static_cast<float>(
    1452     1190880 :         1.0 - (1.0 - dfDensity) * (1.0 - poWK->pafDstDensity[iDstOffset]));
    1453             : }
    1454             : 
    1455             : /************************************************************************/
    1456             : /*                          GWKRoundValueT()                            */
    1457             : /************************************************************************/
    1458             : 
    1459             : template <class T, bool is_signed> struct sGWKRoundValueT
    1460             : {
    1461             :     static T eval(double);
    1462             : };
    1463             : 
    1464             : template <class T> struct sGWKRoundValueT<T, true> /* signed */
    1465             : {
    1466     2312700 :     static T eval(double dfValue)
    1467             :     {
    1468     2312700 :         return static_cast<T>(floor(dfValue + 0.5));
    1469             :     }
    1470             : };
    1471             : 
    1472             : template <class T> struct sGWKRoundValueT<T, false> /* unsigned */
    1473             : {
    1474    12949981 :     static T eval(double dfValue)
    1475             :     {
    1476    12949981 :         return static_cast<T>(dfValue + 0.5);
    1477             :     }
    1478             : };
    1479             : 
    1480    15237881 : template <class T> static T GWKRoundValueT(double dfValue)
    1481             : {
    1482    15237881 :     return sGWKRoundValueT<T, std::numeric_limits<T>::is_signed>::eval(dfValue);
    1483             : }
    1484             : 
    1485      269074 : template <> float GWKRoundValueT<float>(double dfValue)
    1486             : {
    1487      269074 :     return static_cast<float>(dfValue);
    1488             : }
    1489             : 
    1490             : #ifdef notused
    1491             : template <> double GWKRoundValueT<double>(double dfValue)
    1492             : {
    1493             :     return dfValue;
    1494             : }
    1495             : #endif
    1496             : 
    1497             : /************************************************************************/
    1498             : /*                            GWKClampValueT()                          */
    1499             : /************************************************************************/
    1500             : 
    1501    10418216 : template <class T> static CPL_INLINE T GWKClampValueT(double dfValue)
    1502             : {
    1503    10418216 :     if (dfValue < std::numeric_limits<T>::min())
    1504        3969 :         return std::numeric_limits<T>::min();
    1505    10424058 :     else if (dfValue > std::numeric_limits<T>::max())
    1506       18463 :         return std::numeric_limits<T>::max();
    1507             :     else
    1508    10429158 :         return GWKRoundValueT<T>(dfValue);
    1509             : }
    1510             : 
    1511      718914 : template <> float GWKClampValueT<float>(double dfValue)
    1512             : {
    1513      718914 :     return static_cast<float>(dfValue);
    1514             : }
    1515             : 
    1516             : #ifdef notused
    1517             : template <> double GWKClampValueT<double>(double dfValue)
    1518             : {
    1519             :     return dfValue;
    1520             : }
    1521             : #endif
    1522             : 
    1523             : /************************************************************************/
    1524             : /*                         GWKSetPixelValueRealT()                      */
    1525             : /************************************************************************/
    1526             : 
    1527             : template <class T>
    1528      159076 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
    1529             :                                   GPtrDiff_t iDstOffset, double dfDensity,
    1530             :                                   T value)
    1531             : {
    1532      159076 :     T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    1533             : 
    1534             :     /* -------------------------------------------------------------------- */
    1535             :     /*      If the source density is less than 100% we need to fetch the    */
    1536             :     /*      existing destination value, and mix it with the source to       */
    1537             :     /*      get the new "to apply" value.  Also compute composite           */
    1538             :     /*      density.                                                        */
    1539             :     /*                                                                      */
    1540             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1541             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1542             :     /* -------------------------------------------------------------------- */
    1543      159076 :     if (dfDensity < 0.9999)
    1544             :     {
    1545      159076 :         if (dfDensity < 0.0001)
    1546           0 :             return true;
    1547             : 
    1548      159076 :         double dfDstDensity = 1.0;
    1549             : 
    1550      159076 :         if (poWK->pafDstDensity != nullptr)
    1551      157604 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1552        1472 :         else if (poWK->panDstValid != nullptr &&
    1553           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1554           0 :             dfDstDensity = 0.0;
    1555             : 
    1556             :         // It seems like we also ought to be testing panDstValid[] here!
    1557             : 
    1558      159076 :         const double dfDstReal = pDst[iDstOffset];
    1559             : 
    1560             :         // The destination density is really only relative to the portion
    1561             :         // not occluded by the overlay.
    1562      159076 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1563             : 
    1564      159076 :         const double dfReal = (value * dfDensity + dfDstReal * dfDstInfluence) /
    1565      159076 :                               (dfDensity + dfDstInfluence);
    1566             : 
    1567             :         /* --------------------------------------------------------------------
    1568             :          */
    1569             :         /*      Actually apply the destination value. */
    1570             :         /*                                                                      */
    1571             :         /*      Avoid using the destination nodata value for integer datatypes
    1572             :          */
    1573             :         /*      if by chance it is equal to the computed pixel value. */
    1574             :         /* --------------------------------------------------------------------
    1575             :          */
    1576      159076 :         pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
    1577             :     }
    1578             :     else
    1579             :     {
    1580           0 :         pDst[iDstOffset] = value;
    1581             :     }
    1582             : 
    1583      159076 :     if (poWK->padfDstNoDataReal != nullptr &&
    1584           0 :         poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
    1585             :     {
    1586           0 :         if (pDst[iDstOffset] == std::numeric_limits<T>::min())
    1587           0 :             pDst[iDstOffset] = std::numeric_limits<T>::min() + 1;
    1588             :         else
    1589           0 :             pDst[iDstOffset]--;
    1590             :     }
    1591             : 
    1592      159076 :     return true;
    1593             : }
    1594             : 
    1595             : /************************************************************************/
    1596             : /*                          GWKSetPixelValue()                          */
    1597             : /************************************************************************/
    1598             : 
    1599     3867640 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
    1600             :                              GPtrDiff_t iDstOffset, double dfDensity,
    1601             :                              double dfReal, double dfImag)
    1602             : 
    1603             : {
    1604     3867640 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1605             : 
    1606             :     /* -------------------------------------------------------------------- */
    1607             :     /*      If the source density is less than 100% we need to fetch the    */
    1608             :     /*      existing destination value, and mix it with the source to       */
    1609             :     /*      get the new "to apply" value.  Also compute composite           */
    1610             :     /*      density.                                                        */
    1611             :     /*                                                                      */
    1612             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1613             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1614             :     /* -------------------------------------------------------------------- */
    1615     3867640 :     if (dfDensity < 0.9999)
    1616             :     {
    1617         800 :         if (dfDensity < 0.0001)
    1618           0 :             return true;
    1619             : 
    1620         800 :         double dfDstDensity = 1.0;
    1621         800 :         if (poWK->pafDstDensity != nullptr)
    1622         800 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1623           0 :         else if (poWK->panDstValid != nullptr &&
    1624           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1625           0 :             dfDstDensity = 0.0;
    1626             : 
    1627         800 :         double dfDstReal = 0.0;
    1628         800 :         double dfDstImag = 0.0;
    1629             :         // It seems like we also ought to be testing panDstValid[] here!
    1630             : 
    1631             :         // TODO(schwehr): Factor out this repreated type of set.
    1632         800 :         switch (poWK->eWorkingDataType)
    1633             :         {
    1634           0 :             case GDT_Byte:
    1635           0 :                 dfDstReal = pabyDst[iDstOffset];
    1636           0 :                 dfDstImag = 0.0;
    1637           0 :                 break;
    1638             : 
    1639           0 :             case GDT_Int8:
    1640           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1641           0 :                 dfDstImag = 0.0;
    1642           0 :                 break;
    1643             : 
    1644         400 :             case GDT_Int16:
    1645         400 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1646         400 :                 dfDstImag = 0.0;
    1647         400 :                 break;
    1648             : 
    1649         400 :             case GDT_UInt16:
    1650         400 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1651         400 :                 dfDstImag = 0.0;
    1652         400 :                 break;
    1653             : 
    1654           0 :             case GDT_Int32:
    1655           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1656           0 :                 dfDstImag = 0.0;
    1657           0 :                 break;
    1658             : 
    1659           0 :             case GDT_UInt32:
    1660           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1661           0 :                 dfDstImag = 0.0;
    1662           0 :                 break;
    1663             : 
    1664           0 :             case GDT_Int64:
    1665           0 :                 dfDstReal = static_cast<double>(
    1666           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1667           0 :                 dfDstImag = 0.0;
    1668           0 :                 break;
    1669             : 
    1670           0 :             case GDT_UInt64:
    1671           0 :                 dfDstReal = static_cast<double>(
    1672           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1673           0 :                 dfDstImag = 0.0;
    1674           0 :                 break;
    1675             : 
    1676           0 :             case GDT_Float32:
    1677           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
    1678           0 :                 dfDstImag = 0.0;
    1679           0 :                 break;
    1680             : 
    1681           0 :             case GDT_Float64:
    1682           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1683           0 :                 dfDstImag = 0.0;
    1684           0 :                 break;
    1685             : 
    1686           0 :             case GDT_CInt16:
    1687           0 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
    1688           0 :                 dfDstImag =
    1689           0 :                     reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
    1690           0 :                 break;
    1691             : 
    1692           0 :             case GDT_CInt32:
    1693           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
    1694           0 :                 dfDstImag =
    1695           0 :                     reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
    1696           0 :                 break;
    1697             : 
    1698           0 :             case GDT_CFloat32:
    1699           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset * 2];
    1700           0 :                 dfDstImag =
    1701           0 :                     reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1];
    1702           0 :                 break;
    1703             : 
    1704           0 :             case GDT_CFloat64:
    1705           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
    1706           0 :                 dfDstImag =
    1707           0 :                     reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
    1708           0 :                 break;
    1709             : 
    1710           0 :             case GDT_Unknown:
    1711             :             case GDT_TypeCount:
    1712           0 :                 CPLAssert(false);
    1713             :                 return false;
    1714             :         }
    1715             : 
    1716             :         // The destination density is really only relative to the portion
    1717             :         // not occluded by the overlay.
    1718         800 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1719             : 
    1720         800 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    1721         800 :                  (dfDensity + dfDstInfluence);
    1722             : 
    1723         800 :         dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
    1724         800 :                  (dfDensity + dfDstInfluence);
    1725             :     }
    1726             : 
    1727             : /* -------------------------------------------------------------------- */
    1728             : /*      Actually apply the destination value.                           */
    1729             : /*                                                                      */
    1730             : /*      Avoid using the destination nodata value for integer datatypes  */
    1731             : /*      if by chance it is equal to the computed pixel value.           */
    1732             : /* -------------------------------------------------------------------- */
    1733             : 
    1734             : // TODO(schwehr): Can we make this a template?
    1735             : #define CLAMP(type)                                                            \
    1736             :     do                                                                         \
    1737             :     {                                                                          \
    1738             :         type *_pDst = reinterpret_cast<type *>(pabyDst);                       \
    1739             :         if (dfReal < static_cast<double>(std::numeric_limits<type>::min()))    \
    1740             :             _pDst[iDstOffset] =                                                \
    1741             :                 static_cast<type>(std::numeric_limits<type>::min());           \
    1742             :         else if (dfReal >                                                      \
    1743             :                  static_cast<double>(std::numeric_limits<type>::max()))        \
    1744             :             _pDst[iDstOffset] =                                                \
    1745             :                 static_cast<type>(std::numeric_limits<type>::max());           \
    1746             :         else                                                                   \
    1747             :             _pDst[iDstOffset] = (std::numeric_limits<type>::is_signed)         \
    1748             :                                     ? static_cast<type>(floor(dfReal + 0.5))   \
    1749             :                                     : static_cast<type>(dfReal + 0.5);         \
    1750             :         if (poWK->padfDstNoDataReal != nullptr &&                              \
    1751             :             poWK->padfDstNoDataReal[iBand] ==                                  \
    1752             :                 static_cast<double>(_pDst[iDstOffset]))                        \
    1753             :         {                                                                      \
    1754             :             if (_pDst[iDstOffset] ==                                           \
    1755             :                 static_cast<type>(std::numeric_limits<type>::min()))           \
    1756             :                 _pDst[iDstOffset] =                                            \
    1757             :                     static_cast<type>(std::numeric_limits<type>::min() + 1);   \
    1758             :             else                                                               \
    1759             :                 _pDst[iDstOffset]--;                                           \
    1760             :         }                                                                      \
    1761             :     } while (false)
    1762             : 
    1763     3867640 :     switch (poWK->eWorkingDataType)
    1764             :     {
    1765     3141450 :         case GDT_Byte:
    1766     3141450 :             CLAMP(GByte);
    1767     3141450 :             break;
    1768             : 
    1769           0 :         case GDT_Int8:
    1770           0 :             CLAMP(GInt8);
    1771           0 :             break;
    1772             : 
    1773        7470 :         case GDT_Int16:
    1774        7470 :             CLAMP(GInt16);
    1775        7470 :             break;
    1776             : 
    1777         463 :         case GDT_UInt16:
    1778         463 :             CLAMP(GUInt16);
    1779         463 :             break;
    1780             : 
    1781          63 :         case GDT_UInt32:
    1782          63 :             CLAMP(GUInt32);
    1783          63 :             break;
    1784             : 
    1785        3470 :         case GDT_Int32:
    1786        3470 :             CLAMP(GInt32);
    1787        3470 :             break;
    1788             : 
    1789           0 :         case GDT_UInt64:
    1790           0 :             CLAMP(std::uint64_t);
    1791           0 :             break;
    1792             : 
    1793           0 :         case GDT_Int64:
    1794           0 :             CLAMP(std::int64_t);
    1795           0 :             break;
    1796             : 
    1797      478957 :         case GDT_Float32:
    1798      478957 :             reinterpret_cast<float *>(pabyDst)[iDstOffset] =
    1799      478957 :                 static_cast<float>(dfReal);
    1800      478957 :             break;
    1801             : 
    1802         147 :         case GDT_Float64:
    1803         147 :             reinterpret_cast<double *>(pabyDst)[iDstOffset] = dfReal;
    1804         147 :             break;
    1805             : 
    1806      234178 :         case GDT_CInt16:
    1807             :         {
    1808             :             typedef GInt16 T;
    1809      234178 :             if (dfReal < static_cast<double>(std::numeric_limits<T>::min()))
    1810           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1811           0 :                     std::numeric_limits<T>::min();
    1812      234178 :             else if (dfReal >
    1813      234178 :                      static_cast<double>(std::numeric_limits<T>::max()))
    1814           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1815           0 :                     std::numeric_limits<T>::max();
    1816             :             else
    1817      234178 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1818      234178 :                     static_cast<T>(floor(dfReal + 0.5));
    1819      234178 :             if (dfImag < static_cast<double>(std::numeric_limits<T>::min()))
    1820           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1821           0 :                     std::numeric_limits<T>::min();
    1822      234178 :             else if (dfImag >
    1823      234178 :                      static_cast<double>(std::numeric_limits<T>::max()))
    1824           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1825           0 :                     std::numeric_limits<T>::max();
    1826             :             else
    1827      234178 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1828      234178 :                     static_cast<T>(floor(dfImag + 0.5));
    1829      234178 :             break;
    1830             :         }
    1831             : 
    1832         478 :         case GDT_CInt32:
    1833             :         {
    1834             :             typedef GInt32 T;
    1835         478 :             if (dfReal < static_cast<double>(std::numeric_limits<T>::min()))
    1836           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1837           0 :                     std::numeric_limits<T>::min();
    1838         478 :             else if (dfReal >
    1839         478 :                      static_cast<double>(std::numeric_limits<T>::max()))
    1840           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1841           0 :                     std::numeric_limits<T>::max();
    1842             :             else
    1843         478 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1844         478 :                     static_cast<T>(floor(dfReal + 0.5));
    1845         478 :             if (dfImag < static_cast<double>(std::numeric_limits<T>::min()))
    1846           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1847           0 :                     std::numeric_limits<T>::min();
    1848         478 :             else if (dfImag >
    1849         478 :                      static_cast<double>(std::numeric_limits<T>::max()))
    1850           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1851           0 :                     std::numeric_limits<T>::max();
    1852             :             else
    1853         478 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1854         478 :                     static_cast<T>(floor(dfImag + 0.5));
    1855         478 :             break;
    1856             :         }
    1857             : 
    1858         490 :         case GDT_CFloat32:
    1859         490 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
    1860         490 :                 static_cast<float>(dfReal);
    1861         490 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
    1862         490 :                 static_cast<float>(dfImag);
    1863         490 :             break;
    1864             : 
    1865         478 :         case GDT_CFloat64:
    1866         478 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
    1867         478 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
    1868         478 :             break;
    1869             : 
    1870           0 :         case GDT_Unknown:
    1871             :         case GDT_TypeCount:
    1872           0 :             return false;
    1873             :     }
    1874             : 
    1875     3867640 :     return true;
    1876             : }
    1877             : 
    1878             : /************************************************************************/
    1879             : /*                       GWKSetPixelValueReal()                         */
    1880             : /************************************************************************/
    1881             : 
    1882      923761 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    1883             :                                  GPtrDiff_t iDstOffset, double dfDensity,
    1884             :                                  double dfReal)
    1885             : 
    1886             : {
    1887      923761 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1888             : 
    1889             :     /* -------------------------------------------------------------------- */
    1890             :     /*      If the source density is less than 100% we need to fetch the    */
    1891             :     /*      existing destination value, and mix it with the source to       */
    1892             :     /*      get the new "to apply" value.  Also compute composite           */
    1893             :     /*      density.                                                        */
    1894             :     /*                                                                      */
    1895             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1896             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1897             :     /* -------------------------------------------------------------------- */
    1898      923761 :     if (dfDensity < 0.9999)
    1899             :     {
    1900         600 :         if (dfDensity < 0.0001)
    1901           0 :             return true;
    1902             : 
    1903         600 :         double dfDstReal = 0.0;
    1904         600 :         double dfDstDensity = 1.0;
    1905             : 
    1906         600 :         if (poWK->pafDstDensity != nullptr)
    1907         600 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1908           0 :         else if (poWK->panDstValid != nullptr &&
    1909           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1910           0 :             dfDstDensity = 0.0;
    1911             : 
    1912             :         // It seems like we also ought to be testing panDstValid[] here!
    1913             : 
    1914         600 :         switch (poWK->eWorkingDataType)
    1915             :         {
    1916           0 :             case GDT_Byte:
    1917           0 :                 dfDstReal = pabyDst[iDstOffset];
    1918           0 :                 break;
    1919             : 
    1920           0 :             case GDT_Int8:
    1921           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1922           0 :                 break;
    1923             : 
    1924         300 :             case GDT_Int16:
    1925         300 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1926         300 :                 break;
    1927             : 
    1928         300 :             case GDT_UInt16:
    1929         300 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1930         300 :                 break;
    1931             : 
    1932           0 :             case GDT_Int32:
    1933           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1934           0 :                 break;
    1935             : 
    1936           0 :             case GDT_UInt32:
    1937           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1938           0 :                 break;
    1939             : 
    1940           0 :             case GDT_Int64:
    1941           0 :                 dfDstReal = static_cast<double>(
    1942           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1943           0 :                 break;
    1944             : 
    1945           0 :             case GDT_UInt64:
    1946           0 :                 dfDstReal = static_cast<double>(
    1947           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1948           0 :                 break;
    1949             : 
    1950           0 :             case GDT_Float32:
    1951           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
    1952           0 :                 break;
    1953             : 
    1954           0 :             case GDT_Float64:
    1955           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1956           0 :                 break;
    1957             : 
    1958           0 :             case GDT_CInt16:
    1959             :             case GDT_CInt32:
    1960             :             case GDT_CFloat32:
    1961             :             case GDT_CFloat64:
    1962             :             case GDT_Unknown:
    1963             :             case GDT_TypeCount:
    1964           0 :                 CPLAssert(false);
    1965             :                 return false;
    1966             :         }
    1967             : 
    1968             :         // The destination density is really only relative to the portion
    1969             :         // not occluded by the overlay.
    1970         600 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1971             : 
    1972         600 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    1973         600 :                  (dfDensity + dfDstInfluence);
    1974             :     }
    1975             : 
    1976             :     /* -------------------------------------------------------------------- */
    1977             :     /*      Actually apply the destination value.                           */
    1978             :     /*                                                                      */
    1979             :     /*      Avoid using the destination nodata value for integer datatypes  */
    1980             :     /*      if by chance it is equal to the computed pixel value.           */
    1981             :     /* -------------------------------------------------------------------- */
    1982             : 
    1983      923761 :     switch (poWK->eWorkingDataType)
    1984             :     {
    1985      916736 :         case GDT_Byte:
    1986      916736 :             CLAMP(GByte);
    1987      916736 :             break;
    1988             : 
    1989           0 :         case GDT_Int8:
    1990           0 :             CLAMP(GInt8);
    1991           0 :             break;
    1992             : 
    1993        1085 :         case GDT_Int16:
    1994        1085 :             CLAMP(GInt16);
    1995        1085 :             break;
    1996             : 
    1997         363 :         case GDT_UInt16:
    1998         363 :             CLAMP(GUInt16);
    1999         363 :             break;
    2000             : 
    2001         315 :         case GDT_UInt32:
    2002         315 :             CLAMP(GUInt32);
    2003         315 :             break;
    2004             : 
    2005        1318 :         case GDT_Int32:
    2006        1318 :             CLAMP(GInt32);
    2007        1318 :             break;
    2008             : 
    2009           0 :         case GDT_UInt64:
    2010           0 :             CLAMP(std::uint64_t);
    2011           0 :             break;
    2012             : 
    2013         100 :         case GDT_Int64:
    2014         100 :             CLAMP(std::int64_t);
    2015         100 :             break;
    2016             : 
    2017        3426 :         case GDT_Float32:
    2018        3426 :             reinterpret_cast<float *>(pabyDst)[iDstOffset] =
    2019        3426 :                 static_cast<float>(dfReal);
    2020        3426 :             break;
    2021             : 
    2022         418 :         case GDT_Float64:
    2023         418 :             reinterpret_cast<double *>(pabyDst)[iDstOffset] = dfReal;
    2024         418 :             break;
    2025             : 
    2026           0 :         case GDT_CInt16:
    2027             :         case GDT_CInt32:
    2028             :         case GDT_CFloat32:
    2029             :         case GDT_CFloat64:
    2030           0 :             return false;
    2031             : 
    2032           0 :         case GDT_Unknown:
    2033             :         case GDT_TypeCount:
    2034           0 :             CPLAssert(false);
    2035             :             return false;
    2036             :     }
    2037             : 
    2038      923761 :     return true;
    2039             : }
    2040             : 
    2041             : /************************************************************************/
    2042             : /*                          GWKGetPixelValue()                          */
    2043             : /************************************************************************/
    2044             : 
    2045             : /* It is assumed that panUnifiedSrcValid has been checked before */
    2046             : 
    2047    29336100 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
    2048             :                              GPtrDiff_t iSrcOffset, double *pdfDensity,
    2049             :                              double *pdfReal, double *pdfImag)
    2050             : 
    2051             : {
    2052    29336100 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2053             : 
    2054    58672300 :     if (poWK->papanBandSrcValid != nullptr &&
    2055    29336100 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2056           0 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2057             :     {
    2058           0 :         *pdfDensity = 0.0;
    2059           0 :         return false;
    2060             :     }
    2061             : 
    2062    29336100 :     *pdfReal = 0.0;
    2063    29336100 :     *pdfImag = 0.0;
    2064             : 
    2065             :     // TODO(schwehr): Fix casting.
    2066    29336100 :     switch (poWK->eWorkingDataType)
    2067             :     {
    2068    28245600 :         case GDT_Byte:
    2069    28245600 :             *pdfReal = pabySrc[iSrcOffset];
    2070    28245600 :             *pdfImag = 0.0;
    2071    28245600 :             break;
    2072             : 
    2073           0 :         case GDT_Int8:
    2074           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2075           0 :             *pdfImag = 0.0;
    2076           0 :             break;
    2077             : 
    2078       28226 :         case GDT_Int16:
    2079       28226 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2080       28226 :             *pdfImag = 0.0;
    2081       28226 :             break;
    2082             : 
    2083         163 :         case GDT_UInt16:
    2084         163 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2085         163 :             *pdfImag = 0.0;
    2086         163 :             break;
    2087             : 
    2088       13726 :         case GDT_Int32:
    2089       13726 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2090       13726 :             *pdfImag = 0.0;
    2091       13726 :             break;
    2092             : 
    2093          63 :         case GDT_UInt32:
    2094          63 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2095          63 :             *pdfImag = 0.0;
    2096          63 :             break;
    2097             : 
    2098           0 :         case GDT_Int64:
    2099           0 :             *pdfReal = static_cast<double>(
    2100           0 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2101           0 :             *pdfImag = 0.0;
    2102           0 :             break;
    2103             : 
    2104           0 :         case GDT_UInt64:
    2105           0 :             *pdfReal = static_cast<double>(
    2106           0 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2107           0 :             *pdfImag = 0.0;
    2108           0 :             break;
    2109             : 
    2110     1047220 :         case GDT_Float32:
    2111     1047220 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
    2112     1047220 :             *pdfImag = 0.0;
    2113     1047220 :             break;
    2114             : 
    2115         582 :         case GDT_Float64:
    2116         582 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2117         582 :             *pdfImag = 0.0;
    2118         582 :             break;
    2119             : 
    2120         130 :         case GDT_CInt16:
    2121         130 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
    2122         130 :             *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2123         130 :             break;
    2124             : 
    2125         130 :         case GDT_CInt32:
    2126         130 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
    2127         130 :             *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
    2128         130 :             break;
    2129             : 
    2130         178 :         case GDT_CFloat32:
    2131         178 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2];
    2132         178 :             *pdfImag = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1];
    2133         178 :             break;
    2134             : 
    2135         130 :         case GDT_CFloat64:
    2136         130 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
    2137         130 :             *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
    2138         130 :             break;
    2139             : 
    2140           0 :         case GDT_Unknown:
    2141             :         case GDT_TypeCount:
    2142           0 :             CPLAssert(false);
    2143             :             *pdfDensity = 0.0;
    2144             :             return false;
    2145             :     }
    2146             : 
    2147    29336100 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2148     3015160 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2149             :     else
    2150    26321000 :         *pdfDensity = 1.0;
    2151             : 
    2152    29336100 :     return *pdfDensity != 0.0;
    2153             : }
    2154             : 
    2155             : /************************************************************************/
    2156             : /*                       GWKGetPixelValueReal()                         */
    2157             : /************************************************************************/
    2158             : 
    2159        1012 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2160             :                                  GPtrDiff_t iSrcOffset, double *pdfDensity,
    2161             :                                  double *pdfReal)
    2162             : 
    2163             : {
    2164        1012 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2165             : 
    2166        2026 :     if (poWK->papanBandSrcValid != nullptr &&
    2167        1014 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2168           2 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2169             :     {
    2170           0 :         *pdfDensity = 0.0;
    2171           0 :         return false;
    2172             :     }
    2173             : 
    2174        1012 :     switch (poWK->eWorkingDataType)
    2175             :     {
    2176           1 :         case GDT_Byte:
    2177           1 :             *pdfReal = pabySrc[iSrcOffset];
    2178           1 :             break;
    2179             : 
    2180           0 :         case GDT_Int8:
    2181           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2182           0 :             break;
    2183             : 
    2184           1 :         case GDT_Int16:
    2185           1 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2186           1 :             break;
    2187             : 
    2188           1 :         case GDT_UInt16:
    2189           1 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2190           1 :             break;
    2191             : 
    2192         870 :         case GDT_Int32:
    2193         870 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2194         870 :             break;
    2195             : 
    2196          67 :         case GDT_UInt32:
    2197          67 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2198          67 :             break;
    2199             : 
    2200           0 :         case GDT_Int64:
    2201           0 :             *pdfReal = static_cast<double>(
    2202           0 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2203           0 :             break;
    2204             : 
    2205           0 :         case GDT_UInt64:
    2206           0 :             *pdfReal = static_cast<double>(
    2207           0 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2208           0 :             break;
    2209             : 
    2210           2 :         case GDT_Float32:
    2211           2 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
    2212           2 :             break;
    2213             : 
    2214          70 :         case GDT_Float64:
    2215          70 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2216          70 :             break;
    2217             : 
    2218           0 :         case GDT_CInt16:
    2219             :         case GDT_CInt32:
    2220             :         case GDT_CFloat32:
    2221             :         case GDT_CFloat64:
    2222             :         case GDT_Unknown:
    2223             :         case GDT_TypeCount:
    2224           0 :             CPLAssert(false);
    2225             :             return false;
    2226             :     }
    2227             : 
    2228        1012 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2229           0 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2230             :     else
    2231        1012 :         *pdfDensity = 1.0;
    2232             : 
    2233        1012 :     return *pdfDensity != 0.0;
    2234             : }
    2235             : 
    2236             : /************************************************************************/
    2237             : /*                          GWKGetPixelRow()                            */
    2238             : /************************************************************************/
    2239             : 
    2240             : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
    2241             : /* data-types. */
    2242             : 
    2243     2353850 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
    2244             :                            GPtrDiff_t iSrcOffset, int nHalfSrcLen,
    2245             :                            double *padfDensity, double adfReal[],
    2246             :                            double *padfImag)
    2247             : {
    2248             :     // We know that nSrcLen is even, so we can *always* unroll loops 2x.
    2249     2353850 :     const int nSrcLen = nHalfSrcLen * 2;
    2250     2353850 :     bool bHasValid = false;
    2251             : 
    2252     2353850 :     if (padfDensity != nullptr)
    2253             :     {
    2254             :         // Init the density.
    2255     3345770 :         for (int i = 0; i < nSrcLen; i += 2)
    2256             :         {
    2257     2189510 :             padfDensity[i] = 1.0;
    2258     2189510 :             padfDensity[i + 1] = 1.0;
    2259             :         }
    2260             : 
    2261     1156260 :         if (poWK->panUnifiedSrcValid != nullptr)
    2262             :         {
    2263     3281460 :             for (int i = 0; i < nSrcLen; i += 2)
    2264             :             {
    2265     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
    2266     2067740 :                     bHasValid = true;
    2267             :                 else
    2268       74323 :                     padfDensity[i] = 0.0;
    2269             : 
    2270     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
    2271     2068400 :                     bHasValid = true;
    2272             :                 else
    2273       73668 :                     padfDensity[i + 1] = 0.0;
    2274             :             }
    2275             : 
    2276             :             // Reset or fail as needed.
    2277     1139400 :             if (bHasValid)
    2278     1116590 :                 bHasValid = false;
    2279             :             else
    2280       22806 :                 return false;
    2281             :         }
    2282             : 
    2283     1133450 :         if (poWK->papanBandSrcValid != nullptr &&
    2284           0 :             poWK->papanBandSrcValid[iBand] != nullptr)
    2285             :         {
    2286           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2287             :             {
    2288           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
    2289           0 :                     bHasValid = true;
    2290             :                 else
    2291           0 :                     padfDensity[i] = 0.0;
    2292             : 
    2293           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
    2294           0 :                                iSrcOffset + i + 1))
    2295           0 :                     bHasValid = true;
    2296             :                 else
    2297           0 :                     padfDensity[i + 1] = 0.0;
    2298             :             }
    2299             : 
    2300             :             // Reset or fail as needed.
    2301           0 :             if (bHasValid)
    2302           0 :                 bHasValid = false;
    2303             :             else
    2304           0 :                 return false;
    2305             :         }
    2306             :     }
    2307             : 
    2308             :     // TODO(schwehr): Fix casting.
    2309             :     // Fetch data.
    2310     2331040 :     switch (poWK->eWorkingDataType)
    2311             :     {
    2312     1121060 :         case GDT_Byte:
    2313             :         {
    2314     1121060 :             GByte *pSrc =
    2315     1121060 :                 reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
    2316     1121060 :             pSrc += iSrcOffset;
    2317     3243800 :             for (int i = 0; i < nSrcLen; i += 2)
    2318             :             {
    2319     2122740 :                 adfReal[i] = pSrc[i];
    2320     2122740 :                 adfReal[i + 1] = pSrc[i + 1];
    2321             :             }
    2322     1121060 :             break;
    2323             :         }
    2324             : 
    2325           0 :         case GDT_Int8:
    2326             :         {
    2327           0 :             GInt8 *pSrc =
    2328           0 :                 reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
    2329           0 :             pSrc += iSrcOffset;
    2330           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2331             :             {
    2332           0 :                 adfReal[i] = pSrc[i];
    2333           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2334             :             }
    2335           0 :             break;
    2336             :         }
    2337             : 
    2338        5558 :         case GDT_Int16:
    2339             :         {
    2340        5558 :             GInt16 *pSrc =
    2341        5558 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2342        5558 :             pSrc += iSrcOffset;
    2343       21380 :             for (int i = 0; i < nSrcLen; i += 2)
    2344             :             {
    2345       15822 :                 adfReal[i] = pSrc[i];
    2346       15822 :                 adfReal[i + 1] = pSrc[i + 1];
    2347             :             }
    2348        5558 :             break;
    2349             :         }
    2350             : 
    2351        4114 :         case GDT_UInt16:
    2352             :         {
    2353        4114 :             GUInt16 *pSrc =
    2354        4114 :                 reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
    2355        4114 :             pSrc += iSrcOffset;
    2356       18492 :             for (int i = 0; i < nSrcLen; i += 2)
    2357             :             {
    2358       14378 :                 adfReal[i] = pSrc[i];
    2359       14378 :                 adfReal[i + 1] = pSrc[i + 1];
    2360             :             }
    2361        4114 :             break;
    2362             :         }
    2363             : 
    2364        1130 :         case GDT_Int32:
    2365             :         {
    2366        1130 :             GInt32 *pSrc =
    2367        1130 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2368        1130 :             pSrc += iSrcOffset;
    2369        2992 :             for (int i = 0; i < nSrcLen; i += 2)
    2370             :             {
    2371        1862 :                 adfReal[i] = pSrc[i];
    2372        1862 :                 adfReal[i + 1] = pSrc[i + 1];
    2373             :             }
    2374        1130 :             break;
    2375             :         }
    2376             : 
    2377         750 :         case GDT_UInt32:
    2378             :         {
    2379         750 :             GUInt32 *pSrc =
    2380         750 :                 reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
    2381         750 :             pSrc += iSrcOffset;
    2382        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2383             :             {
    2384        1482 :                 adfReal[i] = pSrc[i];
    2385        1482 :                 adfReal[i + 1] = pSrc[i + 1];
    2386             :             }
    2387         750 :             break;
    2388             :         }
    2389             : 
    2390         190 :         case GDT_Int64:
    2391             :         {
    2392         190 :             auto pSrc =
    2393         190 :                 reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
    2394         190 :             pSrc += iSrcOffset;
    2395         380 :             for (int i = 0; i < nSrcLen; i += 2)
    2396             :             {
    2397         190 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2398         190 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2399             :             }
    2400         190 :             break;
    2401             :         }
    2402             : 
    2403           0 :         case GDT_UInt64:
    2404             :         {
    2405           0 :             auto pSrc =
    2406           0 :                 reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
    2407           0 :             pSrc += iSrcOffset;
    2408           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2409             :             {
    2410           0 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2411           0 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2412             :             }
    2413           0 :             break;
    2414             :         }
    2415             : 
    2416       25074 :         case GDT_Float32:
    2417             :         {
    2418       25074 :             float *pSrc =
    2419       25074 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2420       25074 :             pSrc += iSrcOffset;
    2421      121347 :             for (int i = 0; i < nSrcLen; i += 2)
    2422             :             {
    2423       96273 :                 adfReal[i] = pSrc[i];
    2424       96273 :                 adfReal[i + 1] = pSrc[i + 1];
    2425             :             }
    2426       25074 :             break;
    2427             :         }
    2428             : 
    2429         940 :         case GDT_Float64:
    2430             :         {
    2431         940 :             double *pSrc =
    2432         940 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2433         940 :             pSrc += iSrcOffset;
    2434        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2435             :             {
    2436        1672 :                 adfReal[i] = pSrc[i];
    2437        1672 :                 adfReal[i + 1] = pSrc[i + 1];
    2438             :             }
    2439         940 :             break;
    2440             :         }
    2441             : 
    2442     1169410 :         case GDT_CInt16:
    2443             :         {
    2444     1169410 :             GInt16 *pSrc =
    2445     1169410 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2446     1169410 :             pSrc += 2 * iSrcOffset;
    2447     4676400 :             for (int i = 0; i < nSrcLen; i += 2)
    2448             :             {
    2449     3506990 :                 adfReal[i] = pSrc[2 * i];
    2450     3506990 :                 padfImag[i] = pSrc[2 * i + 1];
    2451             : 
    2452     3506990 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2453     3506990 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2454             :             }
    2455     1169410 :             break;
    2456             :         }
    2457             : 
    2458         940 :         case GDT_CInt32:
    2459             :         {
    2460         940 :             GInt32 *pSrc =
    2461         940 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2462         940 :             pSrc += 2 * iSrcOffset;
    2463        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2464             :             {
    2465        1672 :                 adfReal[i] = pSrc[2 * i];
    2466        1672 :                 padfImag[i] = pSrc[2 * i + 1];
    2467             : 
    2468        1672 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2469        1672 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2470             :             }
    2471         940 :             break;
    2472             :         }
    2473             : 
    2474         940 :         case GDT_CFloat32:
    2475             :         {
    2476         940 :             float *pSrc =
    2477         940 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2478         940 :             pSrc += 2 * iSrcOffset;
    2479        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2480             :             {
    2481        1672 :                 adfReal[i] = pSrc[2 * i];
    2482        1672 :                 padfImag[i] = pSrc[2 * i + 1];
    2483             : 
    2484        1672 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2485        1672 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2486             :             }
    2487         940 :             break;
    2488             :         }
    2489             : 
    2490         940 :         case GDT_CFloat64:
    2491             :         {
    2492         940 :             double *pSrc =
    2493         940 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2494         940 :             pSrc += 2 * iSrcOffset;
    2495        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2496             :             {
    2497        1672 :                 adfReal[i] = pSrc[2 * i];
    2498        1672 :                 padfImag[i] = pSrc[2 * i + 1];
    2499             : 
    2500        1672 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2501        1672 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2502             :             }
    2503         940 :             break;
    2504             :         }
    2505             : 
    2506           0 :         case GDT_Unknown:
    2507             :         case GDT_TypeCount:
    2508           0 :             CPLAssert(false);
    2509             :             if (padfDensity)
    2510             :                 memset(padfDensity, 0, nSrcLen * sizeof(double));
    2511             :             return false;
    2512             :     }
    2513             : 
    2514     2331040 :     if (padfDensity == nullptr)
    2515     1197590 :         return true;
    2516             : 
    2517     1133450 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2518             :     {
    2519     3234200 :         for (int i = 0; i < nSrcLen; i += 2)
    2520             :         {
    2521             :             // Take into account earlier calcs.
    2522     2112850 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2523             :             {
    2524     2072950 :                 padfDensity[i] = 1.0;
    2525     2072950 :                 bHasValid = true;
    2526             :             }
    2527             : 
    2528     2112850 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2529             :             {
    2530     2073600 :                 padfDensity[i + 1] = 1.0;
    2531     2073600 :                 bHasValid = true;
    2532             :             }
    2533             :         }
    2534             :     }
    2535             :     else
    2536             :     {
    2537       54348 :         for (int i = 0; i < nSrcLen; i += 2)
    2538             :         {
    2539       42243 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2540       42243 :                 padfDensity[i] = poWK->pafUnifiedSrcDensity[iSrcOffset + i];
    2541       42243 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2542       41704 :                 bHasValid = true;
    2543             : 
    2544       42243 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2545       42243 :                 padfDensity[i + 1] =
    2546       42243 :                     poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1];
    2547       42243 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2548       41598 :                 bHasValid = true;
    2549             :         }
    2550             :     }
    2551             : 
    2552     1133450 :     return bHasValid;
    2553             : }
    2554             : 
    2555             : /************************************************************************/
    2556             : /*                          GWKGetPixelT()                              */
    2557             : /************************************************************************/
    2558             : 
    2559             : template <class T>
    2560     7332114 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
    2561             :                          GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
    2562             : 
    2563             : {
    2564     7332114 :     T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2565             : 
    2566    16802154 :     if ((poWK->panUnifiedSrcValid != nullptr &&
    2567    14664208 :          !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
    2568     7332114 :         (poWK->papanBandSrcValid != nullptr &&
    2569          21 :          poWK->papanBandSrcValid[iBand] != nullptr &&
    2570          21 :          !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
    2571             :     {
    2572           9 :         *pdfDensity = 0.0;
    2573           9 :         return false;
    2574             :     }
    2575             : 
    2576     7332104 :     *pValue = pSrc[iSrcOffset];
    2577             : 
    2578     7332104 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2579     6997351 :         *pdfDensity = 1.0;
    2580             :     else
    2581      334754 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2582             : 
    2583     7332104 :     return *pdfDensity != 0.0;
    2584             : }
    2585             : 
    2586             : /************************************************************************/
    2587             : /*                        GWKBilinearResample()                         */
    2588             : /*     Set of bilinear interpolators                                    */
    2589             : /************************************************************************/
    2590             : 
    2591       72664 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2592             :                                        double dfSrcX, double dfSrcY,
    2593             :                                        double *pdfDensity, double *pdfReal,
    2594             :                                        double *pdfImag)
    2595             : 
    2596             : {
    2597             :     // Save as local variables to avoid following pointers.
    2598       72664 :     const int nSrcXSize = poWK->nSrcXSize;
    2599       72664 :     const int nSrcYSize = poWK->nSrcYSize;
    2600             : 
    2601       72664 :     int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2602       72664 :     int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2603       72664 :     double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2604       72664 :     double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2605       72664 :     bool bShifted = false;
    2606             : 
    2607       72664 :     if (iSrcX == -1)
    2608             :     {
    2609         292 :         iSrcX = 0;
    2610         292 :         dfRatioX = 1;
    2611             :     }
    2612       72664 :     if (iSrcY == -1)
    2613             :     {
    2614        7686 :         iSrcY = 0;
    2615        7686 :         dfRatioY = 1;
    2616             :     }
    2617       72664 :     GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    2618             : 
    2619             :     // Shift so we don't overrun the array.
    2620       72664 :     if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
    2621       72614 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
    2622       72614 :             iSrcOffset + nSrcXSize + 1)
    2623             :     {
    2624         100 :         bShifted = true;
    2625         100 :         --iSrcOffset;
    2626             :     }
    2627             : 
    2628       72664 :     double adfDensity[2] = {0.0, 0.0};
    2629       72664 :     double adfReal[2] = {0.0, 0.0};
    2630       72664 :     double adfImag[2] = {0.0, 0.0};
    2631       72664 :     double dfAccumulatorReal = 0.0;
    2632       72664 :     double dfAccumulatorImag = 0.0;
    2633       72664 :     double dfAccumulatorDensity = 0.0;
    2634       72664 :     double dfAccumulatorDivisor = 0.0;
    2635             : 
    2636       72664 :     const GPtrDiff_t nSrcPixels =
    2637       72664 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
    2638             :     // Get pixel row.
    2639       72664 :     if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
    2640      145328 :         iSrcOffset < nSrcPixels &&
    2641       72664 :         GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
    2642             :                        adfImag))
    2643             :     {
    2644       67008 :         double dfMult1 = dfRatioX * dfRatioY;
    2645       67008 :         double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
    2646             : 
    2647             :         // Shifting corrected.
    2648       67008 :         if (bShifted)
    2649             :         {
    2650         100 :             adfReal[0] = adfReal[1];
    2651         100 :             adfImag[0] = adfImag[1];
    2652         100 :             adfDensity[0] = adfDensity[1];
    2653             :         }
    2654             : 
    2655             :         // Upper Left Pixel.
    2656       67008 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2657       67008 :             adfDensity[0] > SRC_DENSITY_THRESHOLD)
    2658             :         {
    2659       61578 :             dfAccumulatorDivisor += dfMult1;
    2660             : 
    2661       61578 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    2662       61578 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    2663       61578 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    2664             :         }
    2665             : 
    2666             :         // Upper Right Pixel.
    2667       67008 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    2668       66427 :             adfDensity[1] > SRC_DENSITY_THRESHOLD)
    2669             :         {
    2670       61153 :             dfAccumulatorDivisor += dfMult2;
    2671             : 
    2672       61153 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    2673       61153 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    2674       61153 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    2675             :         }
    2676             :     }
    2677             : 
    2678             :     // Get pixel row.
    2679       72664 :     if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
    2680      213910 :         iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
    2681       68582 :         GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
    2682             :                        adfReal, adfImag))
    2683             :     {
    2684       63023 :         double dfMult1 = dfRatioX * (1.0 - dfRatioY);
    2685       63023 :         double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    2686             : 
    2687             :         // Shifting corrected
    2688       63023 :         if (bShifted)
    2689             :         {
    2690          50 :             adfReal[0] = adfReal[1];
    2691          50 :             adfImag[0] = adfImag[1];
    2692          50 :             adfDensity[0] = adfDensity[1];
    2693             :         }
    2694             : 
    2695             :         // Lower Left Pixel
    2696       63023 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2697       63023 :             adfDensity[0] > SRC_DENSITY_THRESHOLD)
    2698             :         {
    2699       57744 :             dfAccumulatorDivisor += dfMult1;
    2700             : 
    2701       57744 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    2702       57744 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    2703       57744 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    2704             :         }
    2705             : 
    2706             :         // Lower Right Pixel.
    2707       63023 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    2708       62492 :             adfDensity[1] > SRC_DENSITY_THRESHOLD)
    2709             :         {
    2710       57515 :             dfAccumulatorDivisor += dfMult2;
    2711             : 
    2712       57515 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    2713       57515 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    2714       57515 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    2715             :         }
    2716             :     }
    2717             : 
    2718             :     /* -------------------------------------------------------------------- */
    2719             :     /*      Return result.                                                  */
    2720             :     /* -------------------------------------------------------------------- */
    2721       72664 :     if (dfAccumulatorDivisor == 1.0)
    2722             :     {
    2723       41607 :         *pdfReal = dfAccumulatorReal;
    2724       41607 :         *pdfImag = dfAccumulatorImag;
    2725       41607 :         *pdfDensity = dfAccumulatorDensity;
    2726       41607 :         return false;
    2727             :     }
    2728       31057 :     else if (dfAccumulatorDivisor < 0.00001)
    2729             :     {
    2730           0 :         *pdfReal = 0.0;
    2731           0 :         *pdfImag = 0.0;
    2732           0 :         *pdfDensity = 0.0;
    2733           0 :         return false;
    2734             :     }
    2735             :     else
    2736             :     {
    2737       31057 :         *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
    2738       31057 :         *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
    2739       31057 :         *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
    2740       31057 :         return true;
    2741             :     }
    2742             : }
    2743             : 
    2744             : template <class T>
    2745     5116014 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    2746             :                                                int iBand, double dfSrcX,
    2747             :                                                double dfSrcY, T *pValue)
    2748             : 
    2749             : {
    2750             : 
    2751     5116014 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2752     5116014 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2753     5116014 :     GPtrDiff_t iSrcOffset =
    2754     5116014 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    2755     5116014 :     const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2756     5116014 :     const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2757             : 
    2758     5116014 :     const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2759             : 
    2760     5116014 :     if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    2761     5012847 :         iSrcY + 1 < poWK->nSrcYSize)
    2762             :     {
    2763     4988678 :         const double dfAccumulator =
    2764     4988678 :             (pSrc[iSrcOffset] * dfRatioX +
    2765     4988678 :              pSrc[iSrcOffset + 1] * (1.0 - dfRatioX)) *
    2766             :                 dfRatioY +
    2767     4988678 :             (pSrc[iSrcOffset + poWK->nSrcXSize] * dfRatioX +
    2768     4988678 :              pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * (1.0 - dfRatioX)) *
    2769     4988678 :                 (1.0 - dfRatioY);
    2770             : 
    2771     4988678 :         *pValue = GWKRoundValueT<T>(dfAccumulator);
    2772             : 
    2773     4988678 :         return true;
    2774             :     }
    2775             : 
    2776      127349 :     double dfAccumulatorDivisor = 0.0;
    2777      127349 :     double dfAccumulator = 0.0;
    2778             : 
    2779             :     // Upper Left Pixel.
    2780      127349 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
    2781       53440 :         iSrcY < poWK->nSrcYSize)
    2782             :     {
    2783       53440 :         const double dfMult = dfRatioX * dfRatioY;
    2784             : 
    2785       53440 :         dfAccumulatorDivisor += dfMult;
    2786             : 
    2787       53440 :         dfAccumulator += pSrc[iSrcOffset] * dfMult;
    2788             :     }
    2789             : 
    2790             :     // Upper Right Pixel.
    2791      127349 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    2792       61354 :         iSrcY < poWK->nSrcYSize)
    2793             :     {
    2794       61354 :         const double dfMult = (1.0 - dfRatioX) * dfRatioY;
    2795             : 
    2796       61354 :         dfAccumulatorDivisor += dfMult;
    2797             : 
    2798       61354 :         dfAccumulator += pSrc[iSrcOffset + 1] * dfMult;
    2799             :     }
    2800             : 
    2801             :     // Lower Right Pixel.
    2802      127349 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    2803       97471 :         iSrcY + 1 < poWK->nSrcYSize)
    2804             :     {
    2805       72902 :         const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    2806             : 
    2807       72902 :         dfAccumulatorDivisor += dfMult;
    2808             : 
    2809       72902 :         dfAccumulator += pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * dfMult;
    2810             :     }
    2811             : 
    2812             :     // Lower Left Pixel.
    2813      127349 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    2814       89535 :         iSrcY + 1 < poWK->nSrcYSize)
    2815             :     {
    2816       64758 :         const double dfMult = dfRatioX * (1.0 - dfRatioY);
    2817             : 
    2818       64758 :         dfAccumulatorDivisor += dfMult;
    2819             : 
    2820       64758 :         dfAccumulator += pSrc[iSrcOffset + poWK->nSrcXSize] * dfMult;
    2821             :     }
    2822             : 
    2823             :     /* -------------------------------------------------------------------- */
    2824             :     /*      Return result.                                                  */
    2825             :     /* -------------------------------------------------------------------- */
    2826      127349 :     double dfValue = 0.0;
    2827             : 
    2828      127349 :     if (dfAccumulatorDivisor < 0.00001)
    2829             :     {
    2830           0 :         *pValue = 0;
    2831           0 :         return false;
    2832             :     }
    2833      127349 :     else if (dfAccumulatorDivisor == 1.0)
    2834             :     {
    2835        8767 :         dfValue = dfAccumulator;
    2836             :     }
    2837             :     else
    2838             :     {
    2839      118582 :         dfValue = dfAccumulator / dfAccumulatorDivisor;
    2840             :     }
    2841             : 
    2842      127349 :     *pValue = GWKRoundValueT<T>(dfValue);
    2843             : 
    2844      127349 :     return true;
    2845             : }
    2846             : 
    2847             : /************************************************************************/
    2848             : /*                        GWKCubicResample()                            */
    2849             : /*     Set of bicubic interpolators using cubic convolution.            */
    2850             : /************************************************************************/
    2851             : 
    2852             : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
    2853             : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
    2854             : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
    2855             : 
    2856             : template <typename T>
    2857     1602850 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
    2858             :                                  T f1, T f2, T f3)
    2859             : {
    2860     1602850 :     return (f1 + T(0.5) * (distance1 * (f2 - f0) +
    2861     1602850 :                            distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
    2862     1602850 :                            distance3 * (3 * (f1 - f2) + f3 - f0)));
    2863             : }
    2864             : 
    2865             : /************************************************************************/
    2866             : /*                       GWKCubicComputeWeights()                       */
    2867             : /************************************************************************/
    2868             : 
    2869             : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
    2870             : 
    2871             : template <typename T>
    2872     2267674 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
    2873             : {
    2874     2267674 :     const T halfX = T(0.5) * x;
    2875     2267674 :     const T threeX = T(3.0) * x;
    2876     2267674 :     const T halfX2 = halfX * x;
    2877             : 
    2878     2267674 :     coeffs[0] = halfX * (-1 + x * (2 - x));
    2879     2267674 :     coeffs[1] = 1 + halfX2 * (-5 + threeX);
    2880     2267674 :     coeffs[2] = halfX * (1 + x * (4 - threeX));
    2881     2267674 :     coeffs[3] = halfX2 * (-1 + x);
    2882     2267674 : }
    2883             : 
    2884             : // TODO(schwehr): Use an inline function.
    2885             : #define CONVOL4(v1, v2)                                                        \
    2886             :     ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2] +               \
    2887             :      (v1)[3] * (v2)[3])
    2888             : 
    2889             : #if 0
    2890             : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
    2891             : // instead of 17.
    2892             : // TODO(schwehr): Use an inline function.
    2893             : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX)             \
    2894             :     {                                                                          \
    2895             :         const double dfX = dfX_;                                               \
    2896             :         dfHalfX = 0.5 * dfX;                                                   \
    2897             :         const double dfThreeX = 3.0 * dfX;                                     \
    2898             :         const double dfXMinus1 = dfX - 1;                                      \
    2899             :                                                                                \
    2900             :         adfCoeffs[0] = -1 + dfX * (2 - dfX);                                   \
    2901             :         adfCoeffs[1] = dfX * (-5 + dfThreeX);                                  \
    2902             :         /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/                           \
    2903             :         adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1];                              \
    2904             :         /*adfCoeffs[3] = dfX * (-1 + dfX); */                                  \
    2905             :         adfCoeffs[3] = dfXMinus1 - adfCoeffs[0];                               \
    2906             :     }
    2907             : 
    2908             : // TODO(schwehr): Use an inline function.
    2909             : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX)                               \
    2910             :     ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
    2911             :                            (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
    2912             : #endif
    2913             : 
    2914      299879 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2915             :                                     double dfSrcX, double dfSrcY,
    2916             :                                     double *pdfDensity, double *pdfReal,
    2917             :                                     double *pdfImag)
    2918             : 
    2919             : {
    2920      299879 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    2921      299879 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    2922      299879 :     GPtrDiff_t iSrcOffset =
    2923      299879 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    2924      299879 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    2925      299879 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    2926      299879 :     double adfDensity[4] = {};
    2927      299879 :     double adfReal[4] = {};
    2928      299879 :     double adfImag[4] = {};
    2929             : 
    2930             :     // Get the bilinear interpolation at the image borders.
    2931      299879 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    2932      284412 :         iSrcY + 2 >= poWK->nSrcYSize)
    2933       24136 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    2934       24136 :                                           pdfDensity, pdfReal, pdfImag);
    2935             : 
    2936      275743 :     double adfValueDens[4] = {};
    2937      275743 :     double adfValueReal[4] = {};
    2938      275743 :     double adfValueImag[4] = {};
    2939             : 
    2940      275743 :     double adfCoeffsX[4] = {};
    2941      275743 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    2942             : 
    2943     1232410 :     for (GPtrDiff_t i = -1; i < 3; i++)
    2944             :     {
    2945     1003120 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    2946      991507 :                             2, adfDensity, adfReal, adfImag) ||
    2947      991507 :             adfDensity[0] < SRC_DENSITY_THRESHOLD ||
    2948      973867 :             adfDensity[1] < SRC_DENSITY_THRESHOLD ||
    2949     2960190 :             adfDensity[2] < SRC_DENSITY_THRESHOLD ||
    2950      965566 :             adfDensity[3] < SRC_DENSITY_THRESHOLD)
    2951             :         {
    2952       46449 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    2953       46449 :                                               pdfDensity, pdfReal, pdfImag);
    2954             :         }
    2955             : 
    2956      956668 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    2957      956668 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    2958      956668 :         adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
    2959             :     }
    2960             : 
    2961             :     /* -------------------------------------------------------------------- */
    2962             :     /*      For now, if we have any pixels missing in the kernel area,      */
    2963             :     /*      we fallback on using bilinear interpolation.  Ideally we        */
    2964             :     /*      should do "weight adjustment" of our results similarly to       */
    2965             :     /*      what is done for the cubic spline and lanc. interpolators.      */
    2966             :     /* -------------------------------------------------------------------- */
    2967             : 
    2968      229294 :     double adfCoeffsY[4] = {};
    2969      229294 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    2970             : 
    2971      229294 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    2972      229294 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    2973      229294 :     *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
    2974             : 
    2975      229294 :     return true;
    2976             : }
    2977             : 
    2978             : #ifdef USE_SSE2
    2979             : 
    2980             : /************************************************************************/
    2981             : /*                           XMMLoad4Values()                           */
    2982             : /*                                                                      */
    2983             : /*  Load 4 packed byte or uint16, cast them to float and put them in a  */
    2984             : /*  m128 register.                                                      */
    2985             : /************************************************************************/
    2986             : 
    2987      949092 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
    2988             : {
    2989             :     unsigned int i;
    2990      949092 :     memcpy(&i, ptr, 4);
    2991     1898180 :     __m128i xmm_i = _mm_cvtsi32_si128(i);
    2992             :     // Zero extend 4 packed unsigned 8-bit integers in a to packed
    2993             :     // 32-bit integers.
    2994             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    2995             :     xmm_i = _mm_cvtepu8_epi32(xmm_i);
    2996             : #else
    2997     1898180 :     xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
    2998     1898180 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    2999             : #endif
    3000     1898180 :     return _mm_cvtepi32_ps(xmm_i);
    3001             : }
    3002             : 
    3003        5292 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
    3004             : {
    3005             :     GUInt64 i;
    3006        5292 :     memcpy(&i, ptr, 8);
    3007       10584 :     __m128i xmm_i = _mm_cvtsi64_si128(i);
    3008             :     // Zero extend 4 packed unsigned 16-bit integers in a to packed
    3009             :     // 32-bit integers.
    3010             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3011             :     xmm_i = _mm_cvtepu16_epi32(xmm_i);
    3012             : #else
    3013       10584 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3014             : #endif
    3015       10584 :     return _mm_cvtepi32_ps(xmm_i);
    3016             : }
    3017             : 
    3018             : /************************************************************************/
    3019             : /*                           XMMHorizontalAdd()                         */
    3020             : /*                                                                      */
    3021             : /*  Return the sum of the 4 floating points of the register.            */
    3022             : /************************************************************************/
    3023             : 
    3024             : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
    3025             : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3026             : {
    3027             :     __m128 shuf = _mm_movehdup_ps(v);   // (v3   , v3   , v1   , v1)
    3028             :     __m128 sums = _mm_add_ps(v, shuf);  // (v3+v3, v3+v2, v1+v1, v1+v0)
    3029             :     shuf = _mm_movehl_ps(shuf, sums);   // (v3   , v3   , v3+v3, v3+v2)
    3030             :     sums = _mm_add_ss(sums, shuf);      // (v1+v0)+(v3+v2)
    3031             :     return _mm_cvtss_f32(sums);
    3032             : }
    3033             : #else
    3034      238596 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3035             : {
    3036      238596 :     __m128 shuf = _mm_movehl_ps(v, v);     // (v3   , v2   , v3   , v2)
    3037      238596 :     __m128 sums = _mm_add_ps(v, shuf);     // (v3+v3, v2+v2, v3+v1, v2+v0)
    3038      238596 :     shuf = _mm_shuffle_ps(sums, sums, 1);  // (v2+v0, v2+v0, v2+v0, v3+v1)
    3039      238596 :     sums = _mm_add_ss(sums, shuf);         // (v2+v0)+(v3+v1)
    3040      238596 :     return _mm_cvtss_f32(sums);
    3041             : }
    3042             : #endif
    3043             : 
    3044             : #endif  // define USE_SSE2
    3045             : 
    3046             : /************************************************************************/
    3047             : /*            GWKCubicResampleSrcMaskIsDensity4SampleRealT()            */
    3048             : /************************************************************************/
    3049             : 
    3050             : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
    3051             : // because there are a few assumptions above those types.
    3052             : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
    3053             : // perf benefit.
    3054             : 
    3055             : template <class T>
    3056         361 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
    3057             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3058             :     double *pdfDensity, double *pdfReal)
    3059             : {
    3060         361 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3061         361 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3062         361 :     const GPtrDiff_t iSrcOffset =
    3063         361 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3064             : 
    3065             :     // Get the bilinear interpolation at the image borders.
    3066         361 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3067         361 :         iSrcY + 2 >= poWK->nSrcYSize)
    3068             :     {
    3069           0 :         double adfImagIgnored[4] = {};
    3070           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3071           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3072             :     }
    3073             : 
    3074             : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3075             :     const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
    3076             :     const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
    3077             : 
    3078             :     // TODO(schwehr): Explain the magic numbers.
    3079             :     float afTemp[4 + 4 + 4 + 1];
    3080             :     float *pafAligned =
    3081             :         reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
    3082             :     float *pafCoeffs = pafAligned;
    3083             :     float *pafDensity = pafAligned + 4;
    3084             :     float *pafValue = pafAligned + 8;
    3085             : 
    3086             :     const float fHalfDeltaX = 0.5f * fDeltaX;
    3087             :     const float fThreeDeltaX = 3.0f * fDeltaX;
    3088             :     const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
    3089             : 
    3090             :     pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
    3091             :     pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
    3092             :     pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
    3093             :     pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
    3094             :     __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
    3095             :     const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD);
    3096             : 
    3097             :     __m128 xmmMaskLowDensity = _mm_setzero_ps();
    3098             :     for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
    3099             :          i++, iOffset += poWK->nSrcXSize)
    3100             :     {
    3101             :         const __m128 xmmDensity =
    3102             :             _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
    3103             :         xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
    3104             :                                       _mm_cmplt_ps(xmmDensity, xmmThreshold));
    3105             :         pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3106             : 
    3107             :         const __m128 xmmValues =
    3108             :             XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
    3109             :         pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
    3110             :     }
    3111             :     if (_mm_movemask_ps(xmmMaskLowDensity))
    3112             :     {
    3113             :         double adfImagIgnored[4] = {};
    3114             :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3115             :                                           pdfDensity, pdfReal, adfImagIgnored);
    3116             :     }
    3117             : 
    3118             :     const float fHalfDeltaY = 0.5f * fDeltaY;
    3119             :     const float fThreeDeltaY = 3.0f * fDeltaY;
    3120             :     const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
    3121             : 
    3122             :     pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
    3123             :     pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
    3124             :     pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
    3125             :     pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
    3126             : 
    3127             :     xmmCoeffs = _mm_load_ps(pafCoeffs);
    3128             : 
    3129             :     const __m128 xmmDensity = _mm_load_ps(pafDensity);
    3130             :     const __m128 xmmValue = _mm_load_ps(pafValue);
    3131             :     *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3132             :     *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
    3133             : 
    3134             :     // We did all above computations on float32 whereas the general case is
    3135             :     // float64. Not sure if one is fundamentally more correct than the other
    3136             :     // one, but we want our optimization to give the same result as the
    3137             :     // general case as much as possible, so if the resulting value is
    3138             :     // close to some_int_value + 0.5, redo the computation with the general
    3139             :     // case.
    3140             :     // Note: If other types than Byte or UInt16, will need changes.
    3141             :     if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
    3142             :         return true;
    3143             : 
    3144             : #endif  // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3145             : 
    3146         361 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3147         361 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3148             : 
    3149         361 :     double adfValueDens[4] = {};
    3150         361 :     double adfValueReal[4] = {};
    3151             : 
    3152         361 :     double adfCoeffsX[4] = {};
    3153         361 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3154             : 
    3155         361 :     double adfCoeffsY[4] = {};
    3156         361 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3157             : 
    3158        1433 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3159             :     {
    3160        1177 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3161             : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
    3162        1177 :         if (poWK->pafUnifiedSrcDensity[iOffset + 0] < SRC_DENSITY_THRESHOLD ||
    3163        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 1] < SRC_DENSITY_THRESHOLD ||
    3164        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 2] < SRC_DENSITY_THRESHOLD ||
    3165        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 3] < SRC_DENSITY_THRESHOLD)
    3166             :         {
    3167         105 :             double adfImagIgnored[4] = {};
    3168         105 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3169             :                                               pdfDensity, pdfReal,
    3170         105 :                                               adfImagIgnored);
    3171             :         }
    3172             : #endif
    3173             : 
    3174        1072 :         adfValueDens[i + 1] =
    3175        1072 :             CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
    3176             : 
    3177        1072 :         adfValueReal[i + 1] = CONVOL4(
    3178             :             adfCoeffsX,
    3179             :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3180             :     }
    3181             : 
    3182         256 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3183         256 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3184             : 
    3185         256 :     return true;
    3186             : }
    3187             : 
    3188             : /************************************************************************/
    3189             : /*              GWKCubicResampleSrcMaskIsDensity4SampleReal()             */
    3190             : /*     Bi-cubic when source has and only has pafUnifiedSrcDensity.      */
    3191             : /************************************************************************/
    3192             : 
    3193           0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
    3194             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3195             :     double *pdfDensity, double *pdfReal)
    3196             : 
    3197             : {
    3198           0 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3199           0 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3200           0 :     const GPtrDiff_t iSrcOffset =
    3201           0 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3202           0 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3203           0 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3204             : 
    3205             :     // Get the bilinear interpolation at the image borders.
    3206           0 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3207           0 :         iSrcY + 2 >= poWK->nSrcYSize)
    3208             :     {
    3209           0 :         double adfImagIgnored[4] = {};
    3210           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3211           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3212             :     }
    3213             : 
    3214           0 :     double adfCoeffsX[4] = {};
    3215           0 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3216             : 
    3217           0 :     double adfCoeffsY[4] = {};
    3218           0 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3219             : 
    3220           0 :     double adfValueDens[4] = {};
    3221           0 :     double adfValueReal[4] = {};
    3222           0 :     double adfDensity[4] = {};
    3223           0 :     double adfReal[4] = {};
    3224           0 :     double adfImagIgnored[4] = {};
    3225             : 
    3226           0 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3227             :     {
    3228           0 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3229           0 :                             2, adfDensity, adfReal, adfImagIgnored) ||
    3230           0 :             adfDensity[0] < SRC_DENSITY_THRESHOLD ||
    3231           0 :             adfDensity[1] < SRC_DENSITY_THRESHOLD ||
    3232           0 :             adfDensity[2] < SRC_DENSITY_THRESHOLD ||
    3233           0 :             adfDensity[3] < SRC_DENSITY_THRESHOLD)
    3234             :         {
    3235           0 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3236             :                                               pdfDensity, pdfReal,
    3237           0 :                                               adfImagIgnored);
    3238             :         }
    3239             : 
    3240           0 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3241           0 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3242             :     }
    3243             : 
    3244           0 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3245           0 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3246             : 
    3247           0 :     return true;
    3248             : }
    3249             : 
    3250             : template <class T>
    3251     1906603 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3252             :                                             int iBand, double dfSrcX,
    3253             :                                             double dfSrcY, T *pValue)
    3254             : 
    3255             : {
    3256     1906603 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3257     1906603 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3258     1906603 :     const GPtrDiff_t iSrcOffset =
    3259     1906603 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3260     1906603 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3261     1906603 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3262     1906603 :     const double dfDeltaY2 = dfDeltaY * dfDeltaY;
    3263     1906603 :     const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
    3264             : 
    3265             :     // Get the bilinear interpolation at the image borders.
    3266     1906603 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3267     1662527 :         iSrcY + 2 >= poWK->nSrcYSize)
    3268      303751 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    3269      303751 :                                                   pValue);
    3270             : 
    3271     1602852 :     double adfCoeffs[4] = {};
    3272     1602852 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
    3273             : 
    3274     1602852 :     double adfValue[4] = {};
    3275             : 
    3276     8014250 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3277             :     {
    3278     6411406 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3279             : 
    3280     6411406 :         adfValue[i + 1] = CONVOL4(
    3281             :             adfCoeffs,
    3282             :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3283             :     }
    3284             : 
    3285             :     const double dfValue =
    3286     1602852 :         CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
    3287             :                          adfValue[1], adfValue[2], adfValue[3]);
    3288             : 
    3289     1602852 :     *pValue = GWKClampValueT<T>(dfValue);
    3290             : 
    3291     1602852 :     return true;
    3292             : }
    3293             : 
    3294             : /************************************************************************/
    3295             : /*                          GWKLanczosSinc()                            */
    3296             : /************************************************************************/
    3297             : 
    3298             : /*
    3299             :  * Lanczos windowed sinc interpolation kernel with radius r.
    3300             :  *        /
    3301             :  *        | sinc(x) * sinc(x/r), if |x| < r
    3302             :  * L(x) = | 1, if x = 0                     ,
    3303             :  *        | 0, otherwise
    3304             :  *        \
    3305             :  *
    3306             :  * where sinc(x) = sin(PI * x) / (PI * x).
    3307             :  */
    3308             : 
    3309        1056 : static double GWKLanczosSinc(double dfX)
    3310             : {
    3311        1056 :     if (dfX == 0.0)
    3312           0 :         return 1.0;
    3313             : 
    3314        1056 :     const double dfPIX = M_PI * dfX;
    3315        1056 :     const double dfPIXoverR = dfPIX / 3;
    3316        1056 :     const double dfPIX2overR = dfPIX * dfPIXoverR;
    3317             :     // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3318             :     // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3319        1056 :     const double dfSinPIXoverR = sin(dfPIXoverR);
    3320        1056 :     const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3321        1056 :     const double dfSinPIXMulSinPIXoverR =
    3322        1056 :         (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3323        1056 :     return dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3324             : }
    3325             : 
    3326      106242 : static double GWKLanczosSinc4Values(double *padfValues)
    3327             : {
    3328      531210 :     for (int i = 0; i < 4; i++)
    3329             :     {
    3330      424968 :         if (padfValues[i] == 0.0)
    3331             :         {
    3332           0 :             padfValues[i] = 1.0;
    3333             :         }
    3334             :         else
    3335             :         {
    3336      424968 :             const double dfPIX = M_PI * padfValues[i];
    3337      424968 :             const double dfPIXoverR = dfPIX / 3;
    3338      424968 :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3339             :             // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3340             :             // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3341      424968 :             const double dfSinPIXoverR = sin(dfPIXoverR);
    3342      424968 :             const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3343      424968 :             const double dfSinPIXMulSinPIXoverR =
    3344      424968 :                 (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3345      424968 :             padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3346             :         }
    3347             :     }
    3348      106242 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3349             : }
    3350             : 
    3351             : /************************************************************************/
    3352             : /*                           GWKBilinear()                              */
    3353             : /************************************************************************/
    3354             : 
    3355     6669050 : static double GWKBilinear(double dfX)
    3356             : {
    3357     6669050 :     double dfAbsX = fabs(dfX);
    3358     6669050 :     if (dfAbsX <= 1.0)
    3359     6197920 :         return 1 - dfAbsX;
    3360             :     else
    3361      471127 :         return 0.0;
    3362             : }
    3363             : 
    3364      396360 : static double GWKBilinear4Values(double *padfValues)
    3365             : {
    3366      396360 :     double dfAbsX0 = fabs(padfValues[0]);
    3367      396360 :     double dfAbsX1 = fabs(padfValues[1]);
    3368      396360 :     double dfAbsX2 = fabs(padfValues[2]);
    3369      396360 :     double dfAbsX3 = fabs(padfValues[3]);
    3370      396360 :     if (dfAbsX0 <= 1.0)
    3371      290431 :         padfValues[0] = 1 - dfAbsX0;
    3372             :     else
    3373      105929 :         padfValues[0] = 0.0;
    3374      396360 :     if (dfAbsX1 <= 1.0)
    3375      396360 :         padfValues[1] = 1 - dfAbsX1;
    3376             :     else
    3377           0 :         padfValues[1] = 0.0;
    3378      396360 :     if (dfAbsX2 <= 1.0)
    3379      396360 :         padfValues[2] = 1 - dfAbsX2;
    3380             :     else
    3381           0 :         padfValues[2] = 0.0;
    3382      396360 :     if (dfAbsX3 <= 1.0)
    3383      290324 :         padfValues[3] = 1 - dfAbsX3;
    3384             :     else
    3385      106036 :         padfValues[3] = 0.0;
    3386      396360 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3387             : }
    3388             : 
    3389             : /************************************************************************/
    3390             : /*                            GWKCubic()                                */
    3391             : /************************************************************************/
    3392             : 
    3393     4335650 : static double GWKCubic(double dfX)
    3394             : {
    3395     4335650 :     return CubicKernel(dfX);
    3396             : }
    3397             : 
    3398     7053180 : static double GWKCubic4Values(double *padfValues)
    3399             : {
    3400     7053180 :     const double dfAbsX_0 = fabs(padfValues[0]);
    3401     7053180 :     const double dfAbsX_1 = fabs(padfValues[1]);
    3402     7053180 :     const double dfAbsX_2 = fabs(padfValues[2]);
    3403     7053180 :     const double dfAbsX_3 = fabs(padfValues[3]);
    3404     7053180 :     const double dfX2_0 = padfValues[0] * padfValues[0];
    3405     7053180 :     const double dfX2_1 = padfValues[1] * padfValues[1];
    3406     7053180 :     const double dfX2_2 = padfValues[2] * padfValues[2];
    3407     7053180 :     const double dfX2_3 = padfValues[3] * padfValues[3];
    3408             : 
    3409     7053180 :     double dfVal0 = 0.0;
    3410     7053180 :     if (dfAbsX_0 <= 1.0)
    3411     1028260 :         dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
    3412     6024920 :     else if (dfAbsX_0 <= 2.0)
    3413     4286600 :         dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
    3414             : 
    3415     7053180 :     double dfVal1 = 0.0;
    3416     7053180 :     if (dfAbsX_1 <= 1.0)
    3417     4103920 :         dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
    3418     2949270 :     else if (dfAbsX_1 <= 2.0)
    3419     2962560 :         dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
    3420             : 
    3421     7053180 :     double dfVal2 = 0.0;
    3422     7053180 :     if (dfAbsX_2 <= 1.0)
    3423     5916670 :         dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
    3424     1136510 :     else if (dfAbsX_2 <= 2.0)
    3425     1149260 :         dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
    3426             : 
    3427     7053180 :     double dfVal3 = 0.0;
    3428     7053180 :     if (dfAbsX_3 <= 1.0)
    3429     3161400 :         dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
    3430     3891790 :     else if (dfAbsX_3 <= 2.0)
    3431     3636100 :         dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
    3432             : 
    3433     7053180 :     padfValues[0] = dfVal0;
    3434     7053180 :     padfValues[1] = dfVal1;
    3435     7053180 :     padfValues[2] = dfVal2;
    3436     7053180 :     padfValues[3] = dfVal3;
    3437     7053180 :     return dfVal0 + dfVal1 + dfVal2 + dfVal3;
    3438             : }
    3439             : 
    3440             : /************************************************************************/
    3441             : /*                           GWKBSpline()                               */
    3442             : /************************************************************************/
    3443             : 
    3444             : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
    3445             : // Equation 8 with (B,C)=(1,0)
    3446             : // 1/6 * ( 3 * |x|^3 -  6 * |x|^2 + 4) |x| < 1
    3447             : // 1/6 * ( -|x|^3 + 6 |x|^2  - 12|x| + 8) |x| >= 1 and |x| < 2
    3448             : 
    3449      138696 : static double GWKBSpline(double x)
    3450             : {
    3451      138696 :     const double xp2 = x + 2.0;
    3452      138696 :     const double xp1 = x + 1.0;
    3453      138696 :     const double xm1 = x - 1.0;
    3454             : 
    3455             :     // This will most likely be used, so we'll compute it ahead of time to
    3456             :     // avoid stalling the processor.
    3457      138696 :     const double xp2c = xp2 * xp2 * xp2;
    3458             : 
    3459             :     // Note that the test is computed only if it is needed.
    3460             :     // TODO(schwehr): Make this easier to follow.
    3461             :     return xp2 > 0.0
    3462      277392 :                ? ((xp1 > 0.0)
    3463      138696 :                       ? ((x > 0.0)
    3464      124338 :                              ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3465       89912 :                                    6.0 * x * x * x
    3466             :                              : 0.0) +
    3467      124338 :                             -4.0 * xp1 * xp1 * xp1
    3468             :                       : 0.0) +
    3469             :                      xp2c
    3470      138696 :                : 0.0;  // * 0.166666666666666666666
    3471             : }
    3472             : 
    3473     2220360 : static double GWKBSpline4Values(double *padfValues)
    3474             : {
    3475    11101800 :     for (int i = 0; i < 4; i++)
    3476             :     {
    3477     8881440 :         const double x = padfValues[i];
    3478     8881440 :         const double xp2 = x + 2.0;
    3479     8881440 :         const double xp1 = x + 1.0;
    3480     8881440 :         const double xm1 = x - 1.0;
    3481             : 
    3482             :         // This will most likely be used, so we'll compute it ahead of time to
    3483             :         // avoid stalling the processor.
    3484     8881440 :         const double xp2c = xp2 * xp2 * xp2;
    3485             : 
    3486             :         // Note that the test is computed only if it is needed.
    3487             :         // TODO(schwehr): Make this easier to follow.
    3488     8881440 :         padfValues[i] =
    3489             :             (xp2 > 0.0)
    3490    17762900 :                 ? ((xp1 > 0.0)
    3491     8881440 :                        ? ((x > 0.0)
    3492     6660880 :                               ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3493     4437750 :                                     6.0 * x * x * x
    3494             :                               : 0.0) +
    3495     6660880 :                              -4.0 * xp1 * xp1 * xp1
    3496             :                        : 0.0) +
    3497             :                       xp2c
    3498             :                 : 0.0;  // * 0.166666666666666666666
    3499             :     }
    3500     2220360 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3501             : }
    3502             : /************************************************************************/
    3503             : /*                       GWKResampleWrkStruct                           */
    3504             : /************************************************************************/
    3505             : 
    3506             : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
    3507             : 
    3508             : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
    3509             :                                    double dfSrcX, double dfSrcY,
    3510             :                                    double *pdfDensity, double *pdfReal,
    3511             :                                    double *pdfImag,
    3512             :                                    GWKResampleWrkStruct *psWrkStruct);
    3513             : 
    3514             : struct _GWKResampleWrkStruct
    3515             : {
    3516             :     pfnGWKResampleType pfnGWKResample;
    3517             : 
    3518             :     // Space for saved X weights.
    3519             :     double *padfWeightsX;
    3520             :     bool *pabCalcX;
    3521             : 
    3522             :     double *padfWeightsY;       // Only used by GWKResampleOptimizedLanczos.
    3523             :     int iLastSrcX;              // Only used by GWKResampleOptimizedLanczos.
    3524             :     int iLastSrcY;              // Only used by GWKResampleOptimizedLanczos.
    3525             :     double dfLastDeltaX;        // Only used by GWKResampleOptimizedLanczos.
    3526             :     double dfLastDeltaY;        // Only used by GWKResampleOptimizedLanczos.
    3527             :     double dfCosPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3528             :     double dfSinPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3529             :     double dfCosPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3530             :     double dfSinPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3531             :     double dfCosPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3532             :     double dfSinPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3533             :     double dfCosPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3534             :     double dfSinPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3535             : 
    3536             :     // Space for saving a row of pixels.
    3537             :     double *padfRowDensity;
    3538             :     double *padfRowReal;
    3539             :     double *padfRowImag;
    3540             : };
    3541             : 
    3542             : /************************************************************************/
    3543             : /*                    GWKResampleCreateWrkStruct()                      */
    3544             : /************************************************************************/
    3545             : 
    3546             : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3547             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3548             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
    3549             : 
    3550             : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3551             :                                         double dfSrcX, double dfSrcY,
    3552             :                                         double *pdfDensity, double *pdfReal,
    3553             :                                         double *pdfImag,
    3554             :                                         GWKResampleWrkStruct *psWrkStruct);
    3555             : 
    3556         342 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
    3557             : {
    3558         342 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3559         342 :     const int nYDist = (poWK->nYRadius + 1) * 2;
    3560             : 
    3561             :     GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
    3562         342 :         CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
    3563             : 
    3564             :     // Alloc space for saved X weights.
    3565         342 :     psWrkStruct->padfWeightsX =
    3566         342 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3567         342 :     psWrkStruct->pabCalcX =
    3568         342 :         static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
    3569             : 
    3570         342 :     psWrkStruct->padfWeightsY =
    3571         342 :         static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
    3572         342 :     psWrkStruct->iLastSrcX = -10;
    3573         342 :     psWrkStruct->iLastSrcY = -10;
    3574         342 :     psWrkStruct->dfLastDeltaX = -10;
    3575         342 :     psWrkStruct->dfLastDeltaY = -10;
    3576             : 
    3577             :     // Alloc space for saving a row of pixels.
    3578         342 :     if (poWK->pafUnifiedSrcDensity == nullptr &&
    3579         314 :         poWK->panUnifiedSrcValid == nullptr &&
    3580         302 :         poWK->papanBandSrcValid == nullptr)
    3581             :     {
    3582         302 :         psWrkStruct->padfRowDensity = nullptr;
    3583             :     }
    3584             :     else
    3585             :     {
    3586          40 :         psWrkStruct->padfRowDensity =
    3587          40 :             static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3588             :     }
    3589         342 :     psWrkStruct->padfRowReal =
    3590         342 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3591         342 :     psWrkStruct->padfRowImag =
    3592         342 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3593             : 
    3594         342 :     if (poWK->eResample == GRA_Lanczos)
    3595             :     {
    3596          63 :         psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
    3597             : 
    3598          63 :         if (poWK->dfXScale < 1)
    3599             :         {
    3600           4 :             psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
    3601           4 :             psWrkStruct->dfSinPiXScaleOver3 =
    3602           4 :                 sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
    3603           4 :                              psWrkStruct->dfCosPiXScaleOver3);
    3604             :             // "Naive":
    3605             :             // const double dfCosPiXScale = cos(  M_PI * dfXScale );
    3606             :             // const double dfSinPiXScale = sin(  M_PI * dfXScale );
    3607             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3608           4 :             psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
    3609           4 :                                               psWrkStruct->dfCosPiXScaleOver3 -
    3610           4 :                                           3) *
    3611           4 :                                          psWrkStruct->dfCosPiXScaleOver3;
    3612           4 :             psWrkStruct->dfSinPiXScale = sqrt(
    3613           4 :                 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
    3614             :         }
    3615             : 
    3616          63 :         if (poWK->dfYScale < 1)
    3617             :         {
    3618          11 :             psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
    3619          11 :             psWrkStruct->dfSinPiYScaleOver3 =
    3620          11 :                 sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
    3621          11 :                              psWrkStruct->dfCosPiYScaleOver3);
    3622             :             // "Naive":
    3623             :             // const double dfCosPiYScale = cos(  M_PI * dfYScale );
    3624             :             // const double dfSinPiYScale = sin(  M_PI * dfYScale );
    3625             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3626          11 :             psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
    3627          11 :                                               psWrkStruct->dfCosPiYScaleOver3 -
    3628          11 :                                           3) *
    3629          11 :                                          psWrkStruct->dfCosPiYScaleOver3;
    3630          11 :             psWrkStruct->dfSinPiYScale = sqrt(
    3631          11 :                 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
    3632             :         }
    3633             :     }
    3634             :     else
    3635         279 :         psWrkStruct->pfnGWKResample = GWKResample;
    3636             : 
    3637         342 :     return psWrkStruct;
    3638             : }
    3639             : 
    3640             : /************************************************************************/
    3641             : /*                    GWKResampleDeleteWrkStruct()                      */
    3642             : /************************************************************************/
    3643             : 
    3644         342 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
    3645             : {
    3646         342 :     CPLFree(psWrkStruct->padfWeightsX);
    3647         342 :     CPLFree(psWrkStruct->padfWeightsY);
    3648         342 :     CPLFree(psWrkStruct->pabCalcX);
    3649         342 :     CPLFree(psWrkStruct->padfRowDensity);
    3650         342 :     CPLFree(psWrkStruct->padfRowReal);
    3651         342 :     CPLFree(psWrkStruct->padfRowImag);
    3652         342 :     CPLFree(psWrkStruct);
    3653         342 : }
    3654             : 
    3655             : /************************************************************************/
    3656             : /*                           GWKResample()                              */
    3657             : /************************************************************************/
    3658             : 
    3659      239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3660             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3661             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
    3662             : 
    3663             : {
    3664             :     // Save as local variables to avoid following pointers in loops.
    3665      239383 :     const int nSrcXSize = poWK->nSrcXSize;
    3666      239383 :     const int nSrcYSize = poWK->nSrcYSize;
    3667             : 
    3668      239383 :     double dfAccumulatorReal = 0.0;
    3669      239383 :     double dfAccumulatorImag = 0.0;
    3670      239383 :     double dfAccumulatorDensity = 0.0;
    3671      239383 :     double dfAccumulatorWeight = 0.0;
    3672      239383 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3673      239383 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3674      239383 :     const GPtrDiff_t iSrcOffset =
    3675      239383 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    3676      239383 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3677      239383 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3678             : 
    3679      239383 :     const double dfXScale = poWK->dfXScale;
    3680      239383 :     const double dfYScale = poWK->dfYScale;
    3681             : 
    3682      239383 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3683             : 
    3684             :     // Space for saved X weights.
    3685      239383 :     double *padfWeightsX = psWrkStruct->padfWeightsX;
    3686      239383 :     bool *pabCalcX = psWrkStruct->pabCalcX;
    3687             : 
    3688             :     // Space for saving a row of pixels.
    3689      239383 :     double *padfRowDensity = psWrkStruct->padfRowDensity;
    3690      239383 :     double *padfRowReal = psWrkStruct->padfRowReal;
    3691      239383 :     double *padfRowImag = psWrkStruct->padfRowImag;
    3692             : 
    3693             :     // Mark as needing calculation (don't calculate the weights yet,
    3694             :     // because a mask may render it unnecessary).
    3695      239383 :     memset(pabCalcX, false, nXDist * sizeof(bool));
    3696             : 
    3697      239383 :     FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
    3698      239383 :     CPLAssert(pfnGetWeight);
    3699             : 
    3700             :     // Skip sampling over edge of image.
    3701      239383 :     int j = poWK->nFiltInitY;
    3702      239383 :     int jMax = poWK->nYRadius;
    3703      239383 :     if (iSrcY + j < 0)
    3704         566 :         j = -iSrcY;
    3705      239383 :     if (iSrcY + jMax >= nSrcYSize)
    3706         662 :         jMax = nSrcYSize - iSrcY - 1;
    3707             : 
    3708      239383 :     int iMin = poWK->nFiltInitX;
    3709      239383 :     int iMax = poWK->nXRadius;
    3710      239383 :     if (iSrcX + iMin < 0)
    3711         566 :         iMin = -iSrcX;
    3712      239383 :     if (iSrcX + iMax >= nSrcXSize)
    3713         659 :         iMax = nSrcXSize - iSrcX - 1;
    3714             : 
    3715      239383 :     const int bXScaleBelow1 = (dfXScale < 1.0);
    3716      239383 :     const int bYScaleBelow1 = (dfYScale < 1.0);
    3717             : 
    3718      239383 :     GPtrDiff_t iRowOffset =
    3719      239383 :         iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
    3720             : 
    3721             :     // Loop over pixel rows in the kernel.
    3722     1445930 :     for (; j <= jMax; ++j)
    3723             :     {
    3724     1206540 :         iRowOffset += nSrcXSize;
    3725             : 
    3726             :         // Get pixel values.
    3727             :         // We can potentially read extra elements after the "normal" end of the
    3728             :         // source arrays, but the contract of papabySrcImage[iBand],
    3729             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    3730             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    3731     1206540 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    3732             :                             padfRowDensity, padfRowReal, padfRowImag))
    3733          72 :             continue;
    3734             : 
    3735             :         // Calculate the Y weight.
    3736             :         double dfWeight1 = (bYScaleBelow1)
    3737     1206470 :                                ? pfnGetWeight((j - dfDeltaY) * dfYScale)
    3738        1600 :                                : pfnGetWeight(j - dfDeltaY);
    3739             : 
    3740             :         // Iterate over pixels in row.
    3741     1206470 :         double dfAccumulatorRealLocal = 0.0;
    3742     1206470 :         double dfAccumulatorImagLocal = 0.0;
    3743     1206470 :         double dfAccumulatorDensityLocal = 0.0;
    3744     1206470 :         double dfAccumulatorWeightLocal = 0.0;
    3745             : 
    3746     7317420 :         for (int i = iMin; i <= iMax; ++i)
    3747             :         {
    3748             :             // Skip sampling if pixel has zero density.
    3749     6110940 :             if (padfRowDensity != nullptr &&
    3750       77277 :                 padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
    3751         546 :                 continue;
    3752             : 
    3753     6110400 :             double dfWeight2 = 0.0;
    3754             : 
    3755             :             // Make or use a cached set of weights for this row.
    3756     6110400 :             if (pabCalcX[i - iMin])
    3757             :             {
    3758             :                 // Use saved weight value instead of recomputing it.
    3759     4903920 :                 dfWeight2 = padfWeightsX[i - iMin];
    3760             :             }
    3761             :             else
    3762             :             {
    3763             :                 // Calculate & save the X weight.
    3764     1206480 :                 padfWeightsX[i - iMin] = dfWeight2 =
    3765     1206480 :                     (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
    3766        1600 :                                     : pfnGetWeight(i - dfDeltaX);
    3767             : 
    3768     1206480 :                 pabCalcX[i - iMin] = true;
    3769             :             }
    3770             : 
    3771             :             // Accumulate!
    3772     6110400 :             dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
    3773     6110400 :             dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
    3774     6110400 :             if (padfRowDensity != nullptr)
    3775       76731 :                 dfAccumulatorDensityLocal +=
    3776       76731 :                     padfRowDensity[i - iMin] * dfWeight2;
    3777     6110400 :             dfAccumulatorWeightLocal += dfWeight2;
    3778             :         }
    3779             : 
    3780     1206470 :         dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
    3781     1206470 :         dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
    3782     1206470 :         dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
    3783     1206470 :         dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
    3784             :     }
    3785             : 
    3786      239383 :     if (dfAccumulatorWeight < 0.000001 ||
    3787        1887 :         (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
    3788             :     {
    3789           0 :         *pdfDensity = 0.0;
    3790           0 :         return false;
    3791             :     }
    3792             : 
    3793             :     // Calculate the output taking into account weighting.
    3794      239383 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    3795             :     {
    3796      239380 :         *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
    3797      239380 :         *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
    3798      239380 :         if (padfRowDensity != nullptr)
    3799        1884 :             *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
    3800             :         else
    3801      237496 :             *pdfDensity = 1.0;
    3802             :     }
    3803             :     else
    3804             :     {
    3805           3 :         *pdfReal = dfAccumulatorReal;
    3806           3 :         *pdfImag = dfAccumulatorImag;
    3807           3 :         if (padfRowDensity != nullptr)
    3808           3 :             *pdfDensity = dfAccumulatorDensity;
    3809             :         else
    3810           0 :             *pdfDensity = 1.0;
    3811             :     }
    3812             : 
    3813      239383 :     return true;
    3814             : }
    3815             : 
    3816             : /************************************************************************/
    3817             : /*                      GWKResampleOptimizedLanczos()                   */
    3818             : /************************************************************************/
    3819             : 
    3820      617144 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3821             :                                         double dfSrcX, double dfSrcY,
    3822             :                                         double *pdfDensity, double *pdfReal,
    3823             :                                         double *pdfImag,
    3824             :                                         GWKResampleWrkStruct *psWrkStruct)
    3825             : 
    3826             : {
    3827             :     // Save as local variables to avoid following pointers in loops.
    3828      617144 :     const int nSrcXSize = poWK->nSrcXSize;
    3829      617144 :     const int nSrcYSize = poWK->nSrcYSize;
    3830             : 
    3831      617144 :     double dfAccumulatorReal = 0.0;
    3832      617144 :     double dfAccumulatorImag = 0.0;
    3833      617144 :     double dfAccumulatorDensity = 0.0;
    3834      617144 :     double dfAccumulatorWeight = 0.0;
    3835      617144 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3836      617144 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3837      617144 :     const GPtrDiff_t iSrcOffset =
    3838      617144 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    3839      617144 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3840      617144 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3841             : 
    3842      617144 :     const double dfXScale = poWK->dfXScale;
    3843      617144 :     const double dfYScale = poWK->dfYScale;
    3844             : 
    3845             :     // Space for saved X weights.
    3846      617144 :     double *const padfWeightsXShifted =
    3847      617144 :         psWrkStruct->padfWeightsX - poWK->nFiltInitX;
    3848      617144 :     double *const padfWeightsYShifted =
    3849      617144 :         psWrkStruct->padfWeightsY - poWK->nFiltInitY;
    3850             : 
    3851             :     // Space for saving a row of pixels.
    3852      617144 :     double *const padfRowDensity = psWrkStruct->padfRowDensity;
    3853      617144 :     double *const padfRowReal = psWrkStruct->padfRowReal;
    3854      617144 :     double *const padfRowImag = psWrkStruct->padfRowImag;
    3855             : 
    3856             :     // Skip sampling over edge of image.
    3857      617144 :     int jMin = poWK->nFiltInitY;
    3858      617144 :     int jMax = poWK->nYRadius;
    3859      617144 :     if (iSrcY + jMin < 0)
    3860       16522 :         jMin = -iSrcY;
    3861      617144 :     if (iSrcY + jMax >= nSrcYSize)
    3862        5782 :         jMax = nSrcYSize - iSrcY - 1;
    3863             : 
    3864      617144 :     int iMin = poWK->nFiltInitX;
    3865      617144 :     int iMax = poWK->nXRadius;
    3866      617144 :     if (iSrcX + iMin < 0)
    3867       15797 :         iMin = -iSrcX;
    3868      617144 :     if (iSrcX + iMax >= nSrcXSize)
    3869        4657 :         iMax = nSrcXSize - iSrcX - 1;
    3870             : 
    3871      617144 :     if (dfXScale < 1.0)
    3872             :     {
    3873      403041 :         while ((iMin - dfDeltaX) * dfXScale < -3.0)
    3874      200179 :             iMin++;
    3875      202862 :         while ((iMax - dfDeltaX) * dfXScale > 3.0)
    3876           0 :             iMax--;
    3877             : 
    3878             :         // clang-format off
    3879             :         /*
    3880             :         Naive version:
    3881             :         for (int i = iMin; i <= iMax; ++i)
    3882             :         {
    3883             :             psWrkStruct->padfWeightsXShifted[i] =
    3884             :                 GWKLanczosSinc((i - dfDeltaX) * dfXScale);
    3885             :         }
    3886             : 
    3887             :         but given that:
    3888             : 
    3889             :         GWKLanczosSinc(x):
    3890             :             if (dfX == 0.0)
    3891             :                 return 1.0;
    3892             : 
    3893             :             const double dfPIX = M_PI * dfX;
    3894             :             const double dfPIXoverR = dfPIX / 3;
    3895             :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3896             :             return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
    3897             : 
    3898             :         and
    3899             :             sin (a + b) = sin a cos b + cos a sin b.
    3900             :             cos (a + b) = cos a cos b - sin a sin b.
    3901             : 
    3902             :         we can skip any sin() computation within the loop
    3903             :         */
    3904             :         // clang-format on
    3905             : 
    3906      202862 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    3907      131072 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    3908             :         {
    3909       71790 :             double dfX = (iMin - dfDeltaX) * dfXScale;
    3910             : 
    3911       71790 :             double dfPIXover3 = M_PI / 3 * dfX;
    3912       71790 :             double dfCosOver3 = cos(dfPIXover3);
    3913       71790 :             double dfSinOver3 = sin(dfPIXover3);
    3914             : 
    3915             :             // "Naive":
    3916             :             // double dfSin = sin( M_PI * dfX );
    3917             :             // double dfCos = cos( M_PI * dfX );
    3918             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    3919       71790 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    3920       71790 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    3921             : 
    3922       71790 :             const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
    3923       71790 :             const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
    3924       71790 :             const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
    3925       71790 :             const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
    3926       71790 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    3927       71790 :             padfWeightsXShifted[iMin] =
    3928       71790 :                 dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
    3929     1636480 :             for (int i = iMin + 1; i <= iMax; ++i)
    3930             :             {
    3931     1564690 :                 dfX += dfXScale;
    3932     1564690 :                 const double dfNewSin =
    3933     1564690 :                     dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
    3934     1564690 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
    3935     1564690 :                                              dfCosOver3 * dfSinPiXScaleOver3;
    3936     1564690 :                 padfWeightsXShifted[i] =
    3937             :                     dfX == 0
    3938     1564690 :                         ? 1.0
    3939     1564690 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
    3940     1564690 :                 const double dfNewCos =
    3941     1564690 :                     dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
    3942     1564690 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
    3943     1564690 :                                              dfSinOver3 * dfSinPiXScaleOver3;
    3944     1564690 :                 dfSin = dfNewSin;
    3945     1564690 :                 dfCos = dfNewCos;
    3946     1564690 :                 dfSinOver3 = dfNewSinOver3;
    3947     1564690 :                 dfCosOver3 = dfNewCosOver3;
    3948             :             }
    3949             : 
    3950       71790 :             psWrkStruct->iLastSrcX = iSrcX;
    3951       71790 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    3952             :         }
    3953             :     }
    3954             :     else
    3955             :     {
    3956      757542 :         while (iMin - dfDeltaX < -3.0)
    3957      343260 :             iMin++;
    3958      414282 :         while (iMax - dfDeltaX > 3.0)
    3959           0 :             iMax--;
    3960             : 
    3961      414282 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    3962      209580 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    3963             :         {
    3964             :             // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
    3965             :             // following trigonometric formulas.
    3966             : 
    3967             :             // TODO(schwehr): Move this somewhere where it can be rendered at
    3968             :             // LaTeX.
    3969             :             // clang-format off
    3970             :             // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
    3971             :             //                            cos(M_PI * dfBase) * sin(M_PI * k)
    3972             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
    3973             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
    3974             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
    3975             : 
    3976             :             // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
    3977             :             //                                  cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
    3978             :             // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
    3979             :             // clang-format on
    3980             : 
    3981      414282 :             const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
    3982      414282 :             const double dfSin2PIDeltaXOver3 =
    3983             :                 dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
    3984             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
    3985      414282 :             const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
    3986      414282 :             const double dfSinPIDeltaX =
    3987      414282 :                 (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
    3988      414282 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    3989      414282 :             const double dfInvPI2Over3xSinPIDeltaX =
    3990             :                 dfInvPI2Over3 * dfSinPIDeltaX;
    3991      414282 :             const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
    3992      414282 :                 -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
    3993      414282 :             const double dfSinPIOver3 = 0.8660254037844386;
    3994      414282 :             const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
    3995      414282 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
    3996             :             const double padfCst[] = {
    3997      414282 :                 dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
    3998      414282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
    3999             :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
    4000      414282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
    4001      414282 :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
    4002             : 
    4003     2936860 :             for (int i = iMin; i <= iMax; ++i)
    4004             :             {
    4005     2522570 :                 const double dfX = i - dfDeltaX;
    4006     2522570 :                 if (dfX == 0.0)
    4007       58282 :                     padfWeightsXShifted[i] = 1.0;
    4008             :                 else
    4009     2464290 :                     padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
    4010             : #if DEBUG_VERBOSE
    4011             :                     // TODO(schwehr): AlmostEqual.
    4012             :                     // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
    4013             :                     //               GWKLanczosSinc(dfX, 3.0)) < 1e-10);
    4014             : #endif
    4015             :             }
    4016             : 
    4017      414282 :             psWrkStruct->iLastSrcX = iSrcX;
    4018      414282 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4019             :         }
    4020             :     }
    4021             : 
    4022      617144 :     if (dfYScale < 1.0)
    4023             :     {
    4024      403116 :         while ((jMin - dfDeltaY) * dfYScale < -3.0)
    4025      200254 :             jMin++;
    4026      202862 :         while ((jMax - dfDeltaY) * dfYScale > 3.0)
    4027           0 :             jMax--;
    4028             : 
    4029             :         // clang-format off
    4030             :         /*
    4031             :         Naive version:
    4032             :         for (int j = jMin; j <= jMax; ++j)
    4033             :         {
    4034             :             padfWeightsYShifted[j] =
    4035             :                 GWKLanczosSinc((j - dfDeltaY) * dfYScale);
    4036             :         }
    4037             :         */
    4038             :         // clang-format on
    4039             : 
    4040      202862 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4041      202479 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4042             :         {
    4043         383 :             double dfY = (jMin - dfDeltaY) * dfYScale;
    4044             : 
    4045         383 :             double dfPIYover3 = M_PI / 3 * dfY;
    4046         383 :             double dfCosOver3 = cos(dfPIYover3);
    4047         383 :             double dfSinOver3 = sin(dfPIYover3);
    4048             : 
    4049             :             // "Naive":
    4050             :             // double dfSin = sin( M_PI * dfY );
    4051             :             // double dfCos = cos( M_PI * dfY );
    4052             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4053         383 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4054         383 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4055             : 
    4056         383 :             const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
    4057         383 :             const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
    4058         383 :             const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
    4059         383 :             const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
    4060         383 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4061         383 :             padfWeightsYShifted[jMin] =
    4062         383 :                 dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
    4063        7318 :             for (int j = jMin + 1; j <= jMax; ++j)
    4064             :             {
    4065        6935 :                 dfY += dfYScale;
    4066        6935 :                 const double dfNewSin =
    4067        6935 :                     dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
    4068        6935 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
    4069        6935 :                                              dfCosOver3 * dfSinPiYScaleOver3;
    4070        6935 :                 padfWeightsYShifted[j] =
    4071             :                     dfY == 0
    4072        6935 :                         ? 1.0
    4073        6935 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
    4074        6935 :                 const double dfNewCos =
    4075        6935 :                     dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
    4076        6935 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
    4077        6935 :                                              dfSinOver3 * dfSinPiYScaleOver3;
    4078        6935 :                 dfSin = dfNewSin;
    4079        6935 :                 dfCos = dfNewCos;
    4080        6935 :                 dfSinOver3 = dfNewSinOver3;
    4081        6935 :                 dfCosOver3 = dfNewCosOver3;
    4082             :             }
    4083             : 
    4084         383 :             psWrkStruct->iLastSrcY = iSrcY;
    4085         383 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4086             :         }
    4087             :     }
    4088             :     else
    4089             :     {
    4090      684742 :         while (jMin - dfDeltaY < -3.0)
    4091      270460 :             jMin++;
    4092      414282 :         while (jMax - dfDeltaY > 3.0)
    4093           0 :             jMax--;
    4094             : 
    4095      414282 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4096      413663 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4097             :         {
    4098        1132 :             const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
    4099        1132 :             const double dfSin2PIDeltaYOver3 =
    4100             :                 dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
    4101             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
    4102        1132 :             const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
    4103        1132 :             const double dfSinPIDeltaY =
    4104        1132 :                 (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
    4105        1132 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4106        1132 :             const double dfInvPI2Over3xSinPIDeltaY =
    4107             :                 dfInvPI2Over3 * dfSinPIDeltaY;
    4108        1132 :             const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
    4109        1132 :                 -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
    4110        1132 :             const double dfSinPIOver3 = 0.8660254037844386;
    4111        1132 :             const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
    4112        1132 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
    4113             :             const double padfCst[] = {
    4114        1132 :                 dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
    4115        1132 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
    4116             :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
    4117        1132 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
    4118        1132 :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
    4119             : 
    4120        7917 :             for (int j = jMin; j <= jMax; ++j)
    4121             :             {
    4122        6785 :                 const double dfY = j - dfDeltaY;
    4123        6785 :                 if (dfY == 0.0)
    4124         460 :                     padfWeightsYShifted[j] = 1.0;
    4125             :                 else
    4126        6325 :                     padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
    4127             : #if DEBUG_VERBOSE
    4128             :                     // TODO(schwehr): AlmostEqual.
    4129             :                     // CPLAssert(fabs(padfWeightsYShifted[j] -
    4130             :                     //               GWKLanczosSinc(dfY, 3.0)) < 1e-10);
    4131             : #endif
    4132             :             }
    4133             : 
    4134        1132 :             psWrkStruct->iLastSrcY = iSrcY;
    4135        1132 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4136             :         }
    4137             :     }
    4138             : 
    4139             :     // If we have no density information, we can simply compute the
    4140             :     // accumulated weight.
    4141      617144 :     if (padfRowDensity == nullptr)
    4142             :     {
    4143      617144 :         double dfRowAccWeight = 0.0;
    4144     7903490 :         for (int i = iMin; i <= iMax; ++i)
    4145             :         {
    4146     7286350 :             dfRowAccWeight += padfWeightsXShifted[i];
    4147             :         }
    4148      617144 :         double dfColAccWeight = 0.0;
    4149     7958040 :         for (int j = jMin; j <= jMax; ++j)
    4150             :         {
    4151     7340900 :             dfColAccWeight += padfWeightsYShifted[j];
    4152             :         }
    4153      617144 :         dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
    4154             :     }
    4155             : 
    4156             :     // Loop over pixel rows in the kernel.
    4157             : 
    4158      617144 :     if (poWK->eWorkingDataType == GDT_Byte && !poWK->panUnifiedSrcValid &&
    4159      616524 :         !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
    4160             :         !padfRowDensity)
    4161             :     {
    4162             :         // Optimization for Byte case without any masking/alpha
    4163             : 
    4164      616524 :         if (dfAccumulatorWeight < 0.000001)
    4165             :         {
    4166           0 :             *pdfDensity = 0.0;
    4167           0 :             return false;
    4168             :         }
    4169             : 
    4170      616524 :         const GByte *pSrc =
    4171      616524 :             reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
    4172      616524 :         pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4173             : 
    4174             : #if defined(USE_SSE2)
    4175      616524 :         if (iMax - iMin + 1 == 6)
    4176             :         {
    4177             :             // This is just an optimized version of the general case in
    4178             :             // the else clause.
    4179             : 
    4180      346854 :             pSrc += iMin;
    4181      346854 :             int j = jMin;
    4182             :             const auto fourXWeights =
    4183      346854 :                 XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
    4184             : 
    4185             :             // Process 2 lines at the same time.
    4186     1375860 :             for (; j < jMax; j += 2)
    4187             :             {
    4188             :                 const XMMReg4Double v_acc =
    4189     1029000 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4190             :                 const XMMReg4Double v_acc2 =
    4191     1029000 :                     XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
    4192     1029000 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4193     1029000 :                 const double dfRowAccEnd =
    4194     1029000 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4195     1029000 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4196     1029000 :                 dfAccumulatorReal +=
    4197     1029000 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4198     1029000 :                 const double dfRowAcc2 = v_acc2.GetHorizSum();
    4199     1029000 :                 const double dfRowAcc2End =
    4200     1029000 :                     pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
    4201     1029000 :                     pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
    4202     1029000 :                 dfAccumulatorReal +=
    4203     1029000 :                     (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
    4204     1029000 :                 pSrc += 2 * nSrcXSize;
    4205             :             }
    4206      346854 :             if (j == jMax)
    4207             :             {
    4208             :                 // Process last line if there's an odd number of them.
    4209             : 
    4210             :                 const XMMReg4Double v_acc =
    4211       86045 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4212       86045 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4213       86045 :                 const double dfRowAccEnd =
    4214       86045 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4215       86045 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4216       86045 :                 dfAccumulatorReal +=
    4217       86045 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4218             :             }
    4219             :         }
    4220             :         else
    4221             : #endif
    4222             :         {
    4223     5463580 :             for (int j = jMin; j <= jMax; ++j)
    4224             :             {
    4225     5193900 :                 int i = iMin;
    4226     5193900 :                 double dfRowAcc1 = 0.0;
    4227     5193900 :                 double dfRowAcc2 = 0.0;
    4228             :                 // A bit of loop unrolling
    4229    62750600 :                 for (; i < iMax; i += 2)
    4230             :                 {
    4231    57556700 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4232    57556700 :                     dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
    4233             :                 }
    4234     5193900 :                 if (i == iMax)
    4235             :                 {
    4236             :                     // Process last column if there's an odd number of them.
    4237      426183 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4238             :                 }
    4239             : 
    4240     5193900 :                 dfAccumulatorReal +=
    4241     5193900 :                     (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
    4242     5193900 :                 pSrc += nSrcXSize;
    4243             :             }
    4244             :         }
    4245             : 
    4246             :         // Calculate the output taking into account weighting.
    4247      616524 :         if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4248             :         {
    4249      569230 :             const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4250      569230 :             *pdfReal = dfAccumulatorReal * dfInvAcc;
    4251      569230 :             *pdfDensity = 1.0;
    4252             :         }
    4253             :         else
    4254             :         {
    4255       47294 :             *pdfReal = dfAccumulatorReal;
    4256       47294 :             *pdfDensity = 1.0;
    4257             :         }
    4258             : 
    4259      616524 :         return true;
    4260             :     }
    4261             : 
    4262         620 :     GPtrDiff_t iRowOffset =
    4263         620 :         iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
    4264             : 
    4265         620 :     int nCountValid = 0;
    4266         620 :     const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
    4267             : 
    4268        3560 :     for (int j = jMin; j <= jMax; ++j)
    4269             :     {
    4270        2940 :         iRowOffset += nSrcXSize;
    4271             : 
    4272             :         // Get pixel values.
    4273             :         // We can potentially read extra elements after the "normal" end of the
    4274             :         // source arrays, but the contract of papabySrcImage[iBand],
    4275             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4276             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4277        2940 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4278             :                             padfRowDensity, padfRowReal, padfRowImag))
    4279           0 :             continue;
    4280             : 
    4281        2940 :         const double dfWeight1 = padfWeightsYShifted[j];
    4282             : 
    4283             :         // Iterate over pixels in row.
    4284        2940 :         if (padfRowDensity != nullptr)
    4285             :         {
    4286           0 :             for (int i = iMin; i <= iMax; ++i)
    4287             :             {
    4288             :                 // Skip sampling if pixel has zero density.
    4289           0 :                 if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
    4290           0 :                     continue;
    4291             : 
    4292           0 :                 nCountValid++;
    4293             : 
    4294             :                 //  Use a cached set of weights for this row.
    4295           0 :                 const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
    4296             : 
    4297             :                 // Accumulate!
    4298           0 :                 dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
    4299           0 :                 dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
    4300           0 :                 dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
    4301           0 :                 dfAccumulatorWeight += dfWeight2;
    4302             :             }
    4303             :         }
    4304        2940 :         else if (bIsNonComplex)
    4305             :         {
    4306        1764 :             double dfRowAccReal = 0.0;
    4307       10560 :             for (int i = iMin; i <= iMax; ++i)
    4308             :             {
    4309        8796 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4310             : 
    4311             :                 // Accumulate!
    4312        8796 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4313             :             }
    4314             : 
    4315        1764 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4316             :         }
    4317             :         else
    4318             :         {
    4319        1176 :             double dfRowAccReal = 0.0;
    4320        1176 :             double dfRowAccImag = 0.0;
    4321        7040 :             for (int i = iMin; i <= iMax; ++i)
    4322             :             {
    4323        5864 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4324             : 
    4325             :                 // Accumulate!
    4326        5864 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4327        5864 :                 dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
    4328             :             }
    4329             : 
    4330        1176 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4331        1176 :             dfAccumulatorImag += dfRowAccImag * dfWeight1;
    4332             :         }
    4333             :     }
    4334             : 
    4335         620 :     if (dfAccumulatorWeight < 0.000001 ||
    4336           0 :         (padfRowDensity != nullptr &&
    4337           0 :          (dfAccumulatorDensity < 0.000001 ||
    4338           0 :           nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
    4339             :     {
    4340           0 :         *pdfDensity = 0.0;
    4341           0 :         return false;
    4342             :     }
    4343             : 
    4344             :     // Calculate the output taking into account weighting.
    4345         620 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4346             :     {
    4347           0 :         const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4348           0 :         *pdfReal = dfAccumulatorReal * dfInvAcc;
    4349           0 :         *pdfImag = dfAccumulatorImag * dfInvAcc;
    4350           0 :         if (padfRowDensity != nullptr)
    4351           0 :             *pdfDensity = dfAccumulatorDensity * dfInvAcc;
    4352             :         else
    4353           0 :             *pdfDensity = 1.0;
    4354             :     }
    4355             :     else
    4356             :     {
    4357         620 :         *pdfReal = dfAccumulatorReal;
    4358         620 :         *pdfImag = dfAccumulatorImag;
    4359         620 :         if (padfRowDensity != nullptr)
    4360           0 :             *pdfDensity = dfAccumulatorDensity;
    4361             :         else
    4362         620 :             *pdfDensity = 1.0;
    4363             :     }
    4364             : 
    4365         620 :     return true;
    4366             : }
    4367             : 
    4368             : /************************************************************************/
    4369             : /*                        GWKComputeWeights()                           */
    4370             : /************************************************************************/
    4371             : 
    4372     3746130 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
    4373             :                               double dfDeltaX, double dfXScale, int jMin,
    4374             :                               int jMax, double dfDeltaY, double dfYScale,
    4375             :                               double *padfWeightsHorizontal,
    4376             :                               double *padfWeightsVertical, double &dfInvWeights)
    4377             : {
    4378             : 
    4379     3746130 :     const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
    4380     3746130 :     CPLAssert(pfnGetWeight);
    4381     3746130 :     const FilterFunc4ValuesType pfnGetWeight4Values =
    4382     3746130 :         apfGWKFilter4Values[eResample];
    4383     3746130 :     CPLAssert(pfnGetWeight4Values);
    4384             : 
    4385     3746130 :     int i = iMin;  // Used after for.
    4386     3746130 :     int iC = 0;    // Used after for.
    4387             :     // Not zero, but as close as possible to it, to avoid potential division by
    4388             :     // zero at end of function
    4389     3746130 :     double dfAccumulatorWeightHorizontal = std::numeric_limits<double>::min();
    4390     8314040 :     for (; i + 2 < iMax; i += 4, iC += 4)
    4391             :     {
    4392     4566400 :         padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
    4393     4566400 :         padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
    4394     4566400 :         padfWeightsHorizontal[iC + 2] =
    4395     4566400 :             padfWeightsHorizontal[iC + 1] + dfXScale;
    4396     4566400 :         padfWeightsHorizontal[iC + 3] =
    4397     4566400 :             padfWeightsHorizontal[iC + 2] + dfXScale;
    4398     4567910 :         dfAccumulatorWeightHorizontal +=
    4399     4566400 :             pfnGetWeight4Values(padfWeightsHorizontal + iC);
    4400             :     }
    4401     3953690 :     for (; i <= iMax; ++i, ++iC)
    4402             :     {
    4403      220112 :         const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
    4404      206053 :         padfWeightsHorizontal[iC] = dfWeight;
    4405      206053 :         dfAccumulatorWeightHorizontal += dfWeight;
    4406             :     }
    4407             : 
    4408     3733580 :     int j = jMin;  // Used after for.
    4409     3733580 :     int jC = 0;    // Used after for.
    4410             :     // Not zero, but as close as possible to it, to avoid potential division by
    4411             :     // zero at end of function
    4412     3733580 :     double dfAccumulatorWeightVertical = std::numeric_limits<double>::min();
    4413     7880320 :     for (; j + 2 < jMax; j += 4, jC += 4)
    4414             :     {
    4415     4143760 :         padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
    4416     4143760 :         padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
    4417     4143760 :         padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
    4418     4143760 :         padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
    4419     4146740 :         dfAccumulatorWeightVertical +=
    4420     4143760 :             pfnGetWeight4Values(padfWeightsVertical + jC);
    4421             :     }
    4422     8238600 :     for (; j <= jMax; ++j, ++jC)
    4423             :     {
    4424     4496340 :         const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
    4425     4502030 :         padfWeightsVertical[jC] = dfWeight;
    4426     4502030 :         dfAccumulatorWeightVertical += dfWeight;
    4427             :     }
    4428             : 
    4429     3742250 :     dfInvWeights =
    4430     3742250 :         1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
    4431     3742250 : }
    4432             : 
    4433             : /************************************************************************/
    4434             : /*                        GWKResampleNoMasksT()                         */
    4435             : /************************************************************************/
    4436             : 
    4437             : template <class T>
    4438             : static bool
    4439             : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    4440             :                     double dfSrcY, T *pValue, double *padfWeightsHorizontal,
    4441             :                     double *padfWeightsVertical, double &dfInvWeights)
    4442             : 
    4443             : {
    4444             :     // Commonly used; save locally.
    4445             :     const int nSrcXSize = poWK->nSrcXSize;
    4446             :     const int nSrcYSize = poWK->nSrcYSize;
    4447             : 
    4448             :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4449             :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4450             :     const GPtrDiff_t iSrcOffset =
    4451             :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4452             : 
    4453             :     const int nXRadius = poWK->nXRadius;
    4454             :     const int nYRadius = poWK->nYRadius;
    4455             : 
    4456             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4457             :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4458             :         nYRadius > nSrcYSize)
    4459             :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4460             :                                                   pValue);
    4461             : 
    4462             :     T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    4463             :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4464             :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4465             : 
    4466             :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4467             :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4468             : 
    4469             :     int iMin = 1 - nXRadius;
    4470             :     if (iSrcX + iMin < 0)
    4471             :         iMin = -iSrcX;
    4472             :     int iMax = nXRadius;
    4473             :     if (iSrcX + iMax >= nSrcXSize - 1)
    4474             :         iMax = nSrcXSize - 1 - iSrcX;
    4475             : 
    4476             :     int jMin = 1 - nYRadius;
    4477             :     if (iSrcY + jMin < 0)
    4478             :         jMin = -iSrcY;
    4479             :     int jMax = nYRadius;
    4480             :     if (iSrcY + jMax >= nSrcYSize - 1)
    4481             :         jMax = nSrcYSize - 1 - iSrcY;
    4482             : 
    4483             :     if (iBand == 0)
    4484             :     {
    4485             :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4486             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4487             :                           padfWeightsVertical, dfInvWeights);
    4488             :     }
    4489             : 
    4490             :     // Loop over all rows in the kernel.
    4491             :     double dfAccumulator = 0.0;
    4492             :     for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
    4493             :     {
    4494             :         const GPtrDiff_t iSampJ =
    4495             :             iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
    4496             : 
    4497             :         // Loop over all pixels in the row.
    4498             :         double dfAccumulatorLocal = 0.0;
    4499             :         double dfAccumulatorLocal2 = 0.0;
    4500             :         int iC = 0;
    4501             :         int i = iMin;
    4502             :         // Process by chunk of 4 cols.
    4503             :         for (; i + 2 < iMax; i += 4, iC += 4)
    4504             :         {
    4505             :             // Retrieve the pixel & accumulate.
    4506             :             dfAccumulatorLocal +=
    4507             :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4508             :             dfAccumulatorLocal +=
    4509             :                 pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
    4510             :             dfAccumulatorLocal2 +=
    4511             :                 pSrcBand[i + 2 + iSampJ] * padfWeightsHorizontal[iC + 2];
    4512             :             dfAccumulatorLocal2 +=
    4513             :                 pSrcBand[i + 3 + iSampJ] * padfWeightsHorizontal[iC + 3];
    4514             :         }
    4515             :         dfAccumulatorLocal += dfAccumulatorLocal2;
    4516             :         if (i < iMax)
    4517             :         {
    4518             :             dfAccumulatorLocal +=
    4519             :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4520             :             dfAccumulatorLocal +=
    4521             :                 pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
    4522             :             i += 2;
    4523             :             iC += 2;
    4524             :         }
    4525             :         if (i == iMax)
    4526             :         {
    4527             :             dfAccumulatorLocal +=
    4528             :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4529             :         }
    4530             : 
    4531             :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4532             :     }
    4533             : 
    4534             :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4535             : 
    4536             :     return true;
    4537             : }
    4538             : 
    4539             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    4540             : /* Could possibly be used too on 32bit, but we would need to check at runtime */
    4541             : #if defined(USE_SSE2)
    4542             : 
    4543             : /************************************************************************/
    4544             : /*                    GWKResampleNoMasks_SSE2_T()                       */
    4545             : /************************************************************************/
    4546             : 
    4547             : template <class T>
    4548     9092963 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
    4549             :                                       double dfSrcX, double dfSrcY, T *pValue,
    4550             :                                       double *padfWeightsHorizontal,
    4551             :                                       double *padfWeightsVertical,
    4552             :                                       double &dfInvWeights)
    4553             : {
    4554             :     // Commonly used; save locally.
    4555     9092963 :     const int nSrcXSize = poWK->nSrcXSize;
    4556     9092963 :     const int nSrcYSize = poWK->nSrcYSize;
    4557             : 
    4558     9092963 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4559     9092963 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4560     9092963 :     const GPtrDiff_t iSrcOffset =
    4561     9092963 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4562     9092963 :     const int nXRadius = poWK->nXRadius;
    4563     9092963 :     const int nYRadius = poWK->nYRadius;
    4564             : 
    4565             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4566     9092963 :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4567             :         nYRadius > nSrcYSize)
    4568           2 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4569           3 :                                                   pValue);
    4570             : 
    4571     9169401 :     const T *pSrcBand =
    4572     9169401 :         reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    4573             : 
    4574     9169401 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4575     9169401 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4576     9169401 :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4577     9165771 :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4578             : 
    4579     9131601 :     int iMin = 1 - nXRadius;
    4580     9131601 :     if (iSrcX + iMin < 0)
    4581       43143 :         iMin = -iSrcX;
    4582     9131601 :     int iMax = nXRadius;
    4583     9131601 :     if (iSrcX + iMax >= nSrcXSize - 1)
    4584       38106 :         iMax = nSrcXSize - 1 - iSrcX;
    4585             : 
    4586     9131601 :     int jMin = 1 - nYRadius;
    4587     9131601 :     if (iSrcY + jMin < 0)
    4588       49554 :         jMin = -iSrcY;
    4589     9131601 :     int jMax = nYRadius;
    4590     9131601 :     if (iSrcY + jMax >= nSrcYSize - 1)
    4591       36028 :         jMax = nSrcYSize - 1 - iSrcY;
    4592             : 
    4593     9131601 :     if (iBand == 0)
    4594             :     {
    4595     3744881 :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4596             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4597             :                           padfWeightsVertical, dfInvWeights);
    4598             :     }
    4599             : 
    4600     9112371 :     GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4601             :     // Process by chunk of 4 rows.
    4602     9112371 :     int jC = 0;
    4603     9112371 :     int j = jMin;
    4604     9112371 :     double dfAccumulator = 0.0;
    4605    19367593 :     for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
    4606             :     {
    4607             :         // Loop over all pixels in the row.
    4608    10257712 :         int iC = 0;
    4609    10257712 :         int i = iMin;
    4610             :         // Process by chunk of 4 cols.
    4611    10257712 :         XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
    4612    10219462 :         XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
    4613    10219632 :         XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
    4614    10209202 :         XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
    4615    26722480 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4616             :         {
    4617             :             // Retrieve the pixel & accumulate.
    4618    16484588 :             XMMReg4Double v_pixels_1 =
    4619    16484588 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4620    16506988 :             XMMReg4Double v_pixels_2 =
    4621    16506988 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
    4622    16498388 :             XMMReg4Double v_pixels_3 =
    4623    16498388 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4624    16471188 :             XMMReg4Double v_pixels_4 =
    4625    16471188 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4626             : 
    4627    16509588 :             XMMReg4Double v_padfWeight =
    4628    16509588 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4629             : 
    4630    16503388 :             v_acc_1 += v_pixels_1 * v_padfWeight;
    4631    16488788 :             v_acc_2 += v_pixels_2 * v_padfWeight;
    4632    16499888 :             v_acc_3 += v_pixels_3 * v_padfWeight;
    4633    16504388 :             v_acc_4 += v_pixels_4 * v_padfWeight;
    4634             :         }
    4635             : 
    4636    10237882 :         if (i < iMax)
    4637             :         {
    4638      142910 :             XMMReg2Double v_pixels_1 =
    4639      142910 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
    4640      142910 :             XMMReg2Double v_pixels_2 =
    4641      142910 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
    4642      142910 :             XMMReg2Double v_pixels_3 =
    4643      142910 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4644      142910 :             XMMReg2Double v_pixels_4 =
    4645      142910 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4646             : 
    4647      142910 :             XMMReg2Double v_padfWeight =
    4648      142910 :                 XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
    4649             : 
    4650      142910 :             v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
    4651      142910 :             v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
    4652      142910 :             v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
    4653      142910 :             v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
    4654             : 
    4655      142910 :             i += 2;
    4656      142910 :             iC += 2;
    4657             :         }
    4658             : 
    4659    10237882 :         double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
    4660    10240892 :         double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
    4661    10243262 :         double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
    4662    10253112 :         double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
    4663             : 
    4664    10255172 :         if (i == iMax)
    4665             :         {
    4666       49195 :             dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
    4667       49195 :                                     padfWeightsHorizontal[iC];
    4668       49195 :             dfAccumulatorLocal_2 +=
    4669       49195 :                 static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
    4670       49195 :                 padfWeightsHorizontal[iC];
    4671       49195 :             dfAccumulatorLocal_3 +=
    4672       49195 :                 static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
    4673       49195 :                 padfWeightsHorizontal[iC];
    4674       49195 :             dfAccumulatorLocal_4 +=
    4675       49195 :                 static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
    4676       49195 :                 padfWeightsHorizontal[iC];
    4677             :         }
    4678             : 
    4679    10255172 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
    4680    10255172 :         dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
    4681    10255172 :         dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
    4682    10255172 :         dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
    4683             :     }
    4684    22104241 :     for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
    4685             :     {
    4686             :         // Loop over all pixels in the row.
    4687    12930740 :         int iC = 0;
    4688    12930740 :         int i = iMin;
    4689             :         // Process by chunk of 4 cols.
    4690    12930740 :         XMMReg4Double v_acc = XMMReg4Double::Zero();
    4691    26195463 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4692             :         {
    4693             :             // Retrieve the pixel & accumulate.
    4694    13086923 :             XMMReg4Double v_pixels =
    4695    13086923 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4696    13098023 :             XMMReg4Double v_padfWeight =
    4697    13098023 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4698             : 
    4699    13053223 :             v_acc += v_pixels * v_padfWeight;
    4700             :         }
    4701             : 
    4702    13108540 :         double dfAccumulatorLocal = v_acc.GetHorizSum();
    4703             : 
    4704    12994340 :         if (i < iMax)
    4705             :         {
    4706      173964 :             dfAccumulatorLocal +=
    4707      173964 :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4708      173964 :             dfAccumulatorLocal +=
    4709      173964 :                 pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
    4710      173964 :             i += 2;
    4711      173964 :             iC += 2;
    4712             :         }
    4713    12994340 :         if (i == iMax)
    4714             :         {
    4715       33020 :             dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
    4716       33020 :                                   padfWeightsHorizontal[iC];
    4717             :         }
    4718             : 
    4719    12994340 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4720             :     }
    4721             : 
    4722     9173531 :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4723             : 
    4724     9143211 :     return true;
    4725             : }
    4726             : 
    4727             : /************************************************************************/
    4728             : /*                     GWKResampleNoMasksT<GByte>()                     */
    4729             : /************************************************************************/
    4730             : 
    4731             : template <>
    4732     8578220 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
    4733             :                                 double dfSrcX, double dfSrcY, GByte *pValue,
    4734             :                                 double *padfWeightsHorizontal,
    4735             :                                 double *padfWeightsVertical,
    4736             :                                 double &dfInvWeights)
    4737             : {
    4738     8578220 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4739             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4740     8578280 :                                      dfInvWeights);
    4741             : }
    4742             : 
    4743             : /************************************************************************/
    4744             : /*                     GWKResampleNoMasksT<GInt16>()                    */
    4745             : /************************************************************************/
    4746             : 
    4747             : template <>
    4748      252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
    4749             :                                  double dfSrcX, double dfSrcY, GInt16 *pValue,
    4750             :                                  double *padfWeightsHorizontal,
    4751             :                                  double *padfWeightsVertical,
    4752             :                                  double &dfInvWeights)
    4753             : {
    4754      252563 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4755             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4756      252563 :                                      dfInvWeights);
    4757             : }
    4758             : 
    4759             : /************************************************************************/
    4760             : /*                     GWKResampleNoMasksT<GUInt16>()                   */
    4761             : /************************************************************************/
    4762             : 
    4763             : template <>
    4764      343440 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
    4765             :                                   double dfSrcX, double dfSrcY, GUInt16 *pValue,
    4766             :                                   double *padfWeightsHorizontal,
    4767             :                                   double *padfWeightsVertical,
    4768             :                                   double &dfInvWeights)
    4769             : {
    4770      343440 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4771             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4772      343440 :                                      dfInvWeights);
    4773             : }
    4774             : 
    4775             : /************************************************************************/
    4776             : /*                     GWKResampleNoMasksT<float>()                     */
    4777             : /************************************************************************/
    4778             : 
    4779             : template <>
    4780        2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
    4781             :                                 double dfSrcX, double dfSrcY, float *pValue,
    4782             :                                 double *padfWeightsHorizontal,
    4783             :                                 double *padfWeightsVertical,
    4784             :                                 double &dfInvWeights)
    4785             : {
    4786        2500 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4787             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4788        2500 :                                      dfInvWeights);
    4789             : }
    4790             : 
    4791             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    4792             : 
    4793             : /************************************************************************/
    4794             : /*                     GWKResampleNoMasksT<double>()                    */
    4795             : /************************************************************************/
    4796             : 
    4797             : template <>
    4798             : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
    4799             :                                  double dfSrcX, double dfSrcY, double *pValue,
    4800             :                                  double *padfWeightsHorizontal,
    4801             :                                  double *padfWeightsVertical,
    4802             :                                  double &dfInvWeights)
    4803             : {
    4804             :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4805             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4806             :                                      dfInvWeights);
    4807             : }
    4808             : 
    4809             : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
    4810             : 
    4811             : #endif /* defined(USE_SSE2) */
    4812             : 
    4813             : /************************************************************************/
    4814             : /*                     GWKRoundSourceCoordinates()                      */
    4815             : /************************************************************************/
    4816             : 
    4817        1000 : static void GWKRoundSourceCoordinates(
    4818             :     int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
    4819             :     double dfSrcCoordPrecision, double dfErrorThreshold,
    4820             :     GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
    4821             :     double dfDstY)
    4822             : {
    4823        1000 :     double dfPct = 0.8;
    4824        1000 :     if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
    4825             :     {
    4826        1000 :         dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
    4827             :     }
    4828        1000 :     const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
    4829             : 
    4830      501000 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    4831             :     {
    4832      500000 :         const double dfXBefore = padfX[iDstX];
    4833      500000 :         const double dfYBefore = padfY[iDstX];
    4834      500000 :         padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    4835             :                        dfSrcCoordPrecision;
    4836      500000 :         padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    4837             :                        dfSrcCoordPrecision;
    4838             : 
    4839             :         // If we are in an uncertainty zone, go to non-approximated
    4840             :         // transformation.
    4841             :         // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
    4842             :         // be at least 10 times greater than the approximation error.
    4843      500000 :         if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
    4844      399914 :             fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
    4845             :         {
    4846      180090 :             padfX[iDstX] = iDstX + dfDstXOff;
    4847      180090 :             padfY[iDstX] = dfDstY;
    4848      180090 :             padfZ[iDstX] = 0.0;
    4849      180090 :             pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
    4850      180090 :                            padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
    4851      180090 :             padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    4852             :                            dfSrcCoordPrecision;
    4853      180090 :             padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    4854             :                            dfSrcCoordPrecision;
    4855             :         }
    4856             :     }
    4857        1000 : }
    4858             : 
    4859             : /************************************************************************/
    4860             : /*                           GWKOpenCLCase()                            */
    4861             : /*                                                                      */
    4862             : /*      This is identical to GWKGeneralCase(), but functions via        */
    4863             : /*      OpenCL. This means we have vector optimization (SSE) and/or     */
    4864             : /*      GPU optimization depending on our prefs. The code itself is     */
    4865             : /*      general and not optimized, but by defining constants we can     */
    4866             : /*      make some pretty darn good code on the fly.                     */
    4867             : /************************************************************************/
    4868             : 
    4869             : #if defined(HAVE_OPENCL)
    4870           0 : static CPLErr GWKOpenCLCase(GDALWarpKernel *poWK)
    4871             : {
    4872           0 :     const int nDstXSize = poWK->nDstXSize;
    4873           0 :     const int nDstYSize = poWK->nDstYSize;
    4874           0 :     const int nSrcXSize = poWK->nSrcXSize;
    4875           0 :     const int nSrcYSize = poWK->nSrcYSize;
    4876           0 :     const int nDstXOff = poWK->nDstXOff;
    4877           0 :     const int nDstYOff = poWK->nDstYOff;
    4878           0 :     const int nSrcXOff = poWK->nSrcXOff;
    4879           0 :     const int nSrcYOff = poWK->nSrcYOff;
    4880           0 :     bool bUseImag = false;
    4881             : 
    4882             :     cl_channel_type imageFormat;
    4883           0 :     switch (poWK->eWorkingDataType)
    4884             :     {
    4885           0 :         case GDT_Byte:
    4886           0 :             imageFormat = CL_UNORM_INT8;
    4887           0 :             break;
    4888           0 :         case GDT_UInt16:
    4889           0 :             imageFormat = CL_UNORM_INT16;
    4890           0 :             break;
    4891           0 :         case GDT_CInt16:
    4892           0 :             bUseImag = true;
    4893             :             [[fallthrough]];
    4894           0 :         case GDT_Int16:
    4895           0 :             imageFormat = CL_SNORM_INT16;
    4896           0 :             break;
    4897           0 :         case GDT_CFloat32:
    4898           0 :             bUseImag = true;
    4899             :             [[fallthrough]];
    4900           0 :         case GDT_Float32:
    4901           0 :             imageFormat = CL_FLOAT;
    4902           0 :             break;
    4903           0 :         default:
    4904             :             // No support for higher precision formats.
    4905           0 :             CPLDebug("OpenCL", "Unsupported resampling OpenCL data type %d.",
    4906           0 :                      static_cast<int>(poWK->eWorkingDataType));
    4907           0 :             return CE_Warning;
    4908             :     }
    4909             : 
    4910             :     OCLResampAlg resampAlg;
    4911           0 :     switch (poWK->eResample)
    4912             :     {
    4913           0 :         case GRA_Bilinear:
    4914           0 :             resampAlg = OCL_Bilinear;
    4915           0 :             break;
    4916           0 :         case GRA_Cubic:
    4917           0 :             resampAlg = OCL_Cubic;
    4918           0 :             break;
    4919           0 :         case GRA_CubicSpline:
    4920           0 :             resampAlg = OCL_CubicSpline;
    4921           0 :             break;
    4922           0 :         case GRA_Lanczos:
    4923           0 :             resampAlg = OCL_Lanczos;
    4924           0 :             break;
    4925           0 :         default:
    4926             :             // No support for higher precision formats.
    4927           0 :             CPLDebug("OpenCL",
    4928             :                      "Unsupported resampling OpenCL resampling alg %d.",
    4929           0 :                      static_cast<int>(poWK->eResample));
    4930           0 :             return CE_Warning;
    4931             :     }
    4932             : 
    4933           0 :     struct oclWarper *warper = nullptr;
    4934             :     cl_int err;
    4935           0 :     CPLErr eErr = CE_None;
    4936             : 
    4937             :     // TODO(schwehr): Fix indenting.
    4938             :     try
    4939             :     {
    4940             : 
    4941             :         // Using a factor of 2 or 4 seems to have much less rounding error
    4942             :         // than 3 on the GPU.
    4943             :         // Then the rounding error can cause strange artifacts under the
    4944             :         // right conditions.
    4945           0 :         warper = GDALWarpKernelOpenCL_createEnv(
    4946             :             nSrcXSize, nSrcYSize, nDstXSize, nDstYSize, imageFormat,
    4947           0 :             poWK->nBands, 4, bUseImag, poWK->papanBandSrcValid != nullptr,
    4948             :             poWK->pafDstDensity, poWK->padfDstNoDataReal, resampAlg, &err);
    4949             : 
    4950           0 :         if (err != CL_SUCCESS || warper == nullptr)
    4951             :         {
    4952           0 :             eErr = CE_Warning;
    4953           0 :             if (warper != nullptr)
    4954           0 :                 throw eErr;
    4955           0 :             return eErr;
    4956             :         }
    4957             : 
    4958           0 :         CPLDebug("GDAL",
    4959             :                  "GDALWarpKernel()::GWKOpenCLCase() "
    4960             :                  "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
    4961             :                  nSrcXOff, nSrcYOff, nSrcXSize, nSrcYSize, nDstXOff, nDstYOff,
    4962             :                  nDstXSize, nDstYSize);
    4963             : 
    4964           0 :         if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
    4965             :         {
    4966           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    4967           0 :             eErr = CE_Failure;
    4968           0 :             throw eErr;
    4969             :         }
    4970             : 
    4971             :         /* ====================================================================
    4972             :          */
    4973             :         /*      Loop over bands. */
    4974             :         /* ====================================================================
    4975             :          */
    4976           0 :         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    4977             :         {
    4978           0 :             if (poWK->papanBandSrcValid != nullptr &&
    4979           0 :                 poWK->papanBandSrcValid[iBand] != nullptr)
    4980             :             {
    4981           0 :                 GDALWarpKernelOpenCL_setSrcValid(
    4982             :                     warper,
    4983           0 :                     reinterpret_cast<int *>(poWK->papanBandSrcValid[iBand]),
    4984             :                     iBand);
    4985           0 :                 if (err != CL_SUCCESS)
    4986             :                 {
    4987           0 :                     CPLError(
    4988             :                         CE_Failure, CPLE_AppDefined,
    4989             :                         "OpenCL routines reported failure (%d) on line %d.",
    4990             :                         static_cast<int>(err), __LINE__);
    4991           0 :                     eErr = CE_Failure;
    4992           0 :                     throw eErr;
    4993             :                 }
    4994             :             }
    4995             : 
    4996           0 :             err = GDALWarpKernelOpenCL_setSrcImg(
    4997           0 :                 warper, poWK->papabySrcImage[iBand], iBand);
    4998           0 :             if (err != CL_SUCCESS)
    4999             :             {
    5000           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    5001             :                          "OpenCL routines reported failure (%d) on line %d.",
    5002             :                          static_cast<int>(err), __LINE__);
    5003           0 :                 eErr = CE_Failure;
    5004           0 :                 throw eErr;
    5005             :             }
    5006             : 
    5007           0 :             err = GDALWarpKernelOpenCL_setDstImg(
    5008           0 :                 warper, poWK->papabyDstImage[iBand], iBand);
    5009           0 :             if (err != CL_SUCCESS)
    5010             :             {
    5011           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    5012             :                          "OpenCL routines reported failure (%d) on line %d.",
    5013             :                          static_cast<int>(err), __LINE__);
    5014           0 :                 eErr = CE_Failure;
    5015           0 :                 throw eErr;
    5016             :             }
    5017             :         }
    5018             : 
    5019             :         /* --------------------------------------------------------------------
    5020             :          */
    5021             :         /*      Allocate x,y,z coordinate arrays for transformation ... one */
    5022             :         /*      scanlines worth of positions. */
    5023             :         /* --------------------------------------------------------------------
    5024             :          */
    5025             : 
    5026             :         // For x, 2 *, because we cache the precomputed values at the end.
    5027             :         double *padfX =
    5028           0 :             static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5029             :         double *padfY =
    5030           0 :             static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5031             :         double *padfZ =
    5032           0 :             static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5033             :         int *pabSuccess =
    5034           0 :             static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5035           0 :         const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5036           0 :             poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5037           0 :         const double dfErrorThreshold = CPLAtof(CSLFetchNameValueDef(
    5038           0 :             poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5039             : 
    5040             :         // Precompute values.
    5041           0 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5042           0 :             padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5043             : 
    5044             :         /* ====================================================================
    5045             :          */
    5046             :         /*      Loop over output lines. */
    5047             :         /* ====================================================================
    5048             :          */
    5049           0 :         for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; ++iDstY)
    5050             :         {
    5051             :             /* ----------------------------------------------------------------
    5052             :              */
    5053             :             /*      Setup points to transform to source image space. */
    5054             :             /* ----------------------------------------------------------------
    5055             :              */
    5056           0 :             memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5057           0 :             const double dfYConst = iDstY + 0.5 + poWK->nDstYOff;
    5058           0 :             for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5059           0 :                 padfY[iDstX] = dfYConst;
    5060           0 :             memset(padfZ, 0, sizeof(double) * nDstXSize);
    5061             : 
    5062             :             /* ----------------------------------------------------------------
    5063             :              */
    5064             :             /*      Transform the points from destination pixel/line
    5065             :              * coordinates*/
    5066             :             /*      to source pixel/line coordinates. */
    5067             :             /* ----------------------------------------------------------------
    5068             :              */
    5069           0 :             poWK->pfnTransformer(poWK->pTransformerArg, TRUE, nDstXSize, padfX,
    5070             :                                  padfY, padfZ, pabSuccess);
    5071           0 :             if (dfSrcCoordPrecision > 0.0)
    5072             :             {
    5073           0 :                 GWKRoundSourceCoordinates(
    5074             :                     nDstXSize, padfX, padfY, padfZ, pabSuccess,
    5075             :                     dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
    5076             :                     poWK->pTransformerArg, 0.5 + nDstXOff,
    5077           0 :                     iDstY + 0.5 + nDstYOff);
    5078             :             }
    5079             : 
    5080           0 :             err = GDALWarpKernelOpenCL_setCoordRow(
    5081             :                 warper, padfX, padfY, nSrcXOff, nSrcYOff, pabSuccess, iDstY);
    5082           0 :             if (err != CL_SUCCESS)
    5083             :             {
    5084           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    5085             :                          "OpenCL routines reported failure (%d) on line %d.",
    5086             :                          static_cast<int>(err), __LINE__);
    5087           0 :                 eErr = CE_Failure;
    5088           0 :                 break;
    5089             :             }
    5090             : 
    5091             :             // Update the valid & density masks because we don't do so in the
    5092             :             // kernel.
    5093           0 :             for (int iDstX = 0; iDstX < nDstXSize && eErr == CE_None; iDstX++)
    5094             :             {
    5095           0 :                 const double dfX = padfX[iDstX];
    5096           0 :                 const double dfY = padfY[iDstX];
    5097           0 :                 const GPtrDiff_t iDstOffset =
    5098           0 :                     iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5099             : 
    5100             :                 // See GWKGeneralCase() for appropriate commenting.
    5101           0 :                 if (!pabSuccess[iDstX] || dfX < nSrcXOff || dfY < nSrcYOff)
    5102           0 :                     continue;
    5103             : 
    5104           0 :                 int iSrcX = static_cast<int>(dfX) - nSrcXOff;
    5105           0 :                 int iSrcY = static_cast<int>(dfY) - nSrcYOff;
    5106             : 
    5107           0 :                 if (iSrcX < 0 || iSrcX >= nSrcXSize || iSrcY < 0 ||
    5108             :                     iSrcY >= nSrcYSize)
    5109           0 :                     continue;
    5110             : 
    5111           0 :                 GPtrDiff_t iSrcOffset =
    5112           0 :                     iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    5113           0 :                 double dfDensity = 1.0;
    5114             : 
    5115           0 :                 if (poWK->pafUnifiedSrcDensity != nullptr && iSrcX >= 0 &&
    5116           0 :                     iSrcY >= 0 && iSrcX < nSrcXSize && iSrcY < nSrcYSize)
    5117           0 :                     dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5118             : 
    5119           0 :                 GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5120             : 
    5121             :                 // Because this is on the bit-wise level, it can't be done well
    5122             :                 // in OpenCL.
    5123           0 :                 if (poWK->panDstValid != nullptr)
    5124           0 :                     poWK->panDstValid[iDstOffset >> 5] |=
    5125           0 :                         0x01 << (iDstOffset & 0x1f);
    5126             :             }
    5127             :         }
    5128             : 
    5129           0 :         CPLFree(padfX);
    5130           0 :         CPLFree(padfY);
    5131           0 :         CPLFree(padfZ);
    5132           0 :         CPLFree(pabSuccess);
    5133             : 
    5134           0 :         if (eErr != CE_None)
    5135           0 :             throw eErr;
    5136             : 
    5137           0 :         err = GDALWarpKernelOpenCL_runResamp(
    5138             :             warper, poWK->pafUnifiedSrcDensity, poWK->panUnifiedSrcValid,
    5139             :             poWK->pafDstDensity, poWK->panDstValid, poWK->dfXScale,
    5140             :             poWK->dfYScale, poWK->dfXFilter, poWK->dfYFilter, poWK->nXRadius,
    5141             :             poWK->nYRadius, poWK->nFiltInitX, poWK->nFiltInitY);
    5142             : 
    5143           0 :         if (err != CL_SUCCESS)
    5144             :         {
    5145           0 :             CPLError(CE_Failure, CPLE_AppDefined,
    5146             :                      "OpenCL routines reported failure (%d) on line %d.",
    5147             :                      static_cast<int>(err), __LINE__);
    5148           0 :             eErr = CE_Failure;
    5149           0 :             throw eErr;
    5150             :         }
    5151             : 
    5152             :         /* ====================================================================
    5153             :          */
    5154             :         /*      Loop over output lines. */
    5155             :         /* ====================================================================
    5156             :          */
    5157           0 :         for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; iDstY++)
    5158             :         {
    5159           0 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5160             :             {
    5161           0 :                 void *rowReal = nullptr;
    5162           0 :                 void *rowImag = nullptr;
    5163           0 :                 GByte *pabyDst = poWK->papabyDstImage[iBand];
    5164             : 
    5165           0 :                 err = GDALWarpKernelOpenCL_getRow(warper, &rowReal, &rowImag,
    5166             :                                                   iDstY, iBand);
    5167           0 :                 if (err != CL_SUCCESS)
    5168             :                 {
    5169           0 :                     CPLError(
    5170             :                         CE_Failure, CPLE_AppDefined,
    5171             :                         "OpenCL routines reported failure (%d) on line %d.",
    5172             :                         static_cast<int>(err), __LINE__);
    5173           0 :                     eErr = CE_Failure;
    5174           0 :                     throw eErr;
    5175             :                 }
    5176             : 
    5177             :                 // Copy the data from the warper to GDAL's memory.
    5178           0 :                 switch (poWK->eWorkingDataType)
    5179             :                 {
    5180           0 :                     case GDT_Byte:
    5181           0 :                         memcpy(&(pabyDst[iDstY * nDstXSize]), rowReal,
    5182             :                                sizeof(GByte) * nDstXSize);
    5183           0 :                         break;
    5184           0 :                     case GDT_Int16:
    5185           0 :                         memcpy(&(reinterpret_cast<GInt16 *>(
    5186           0 :                                    pabyDst)[iDstY * nDstXSize]),
    5187           0 :                                rowReal, sizeof(GInt16) * nDstXSize);
    5188           0 :                         break;
    5189           0 :                     case GDT_UInt16:
    5190           0 :                         memcpy(&(reinterpret_cast<GUInt16 *>(
    5191           0 :                                    pabyDst)[iDstY * nDstXSize]),
    5192           0 :                                rowReal, sizeof(GUInt16) * nDstXSize);
    5193           0 :                         break;
    5194           0 :                     case GDT_Float32:
    5195           0 :                         memcpy(&(reinterpret_cast<float *>(
    5196           0 :                                    pabyDst)[iDstY * nDstXSize]),
    5197           0 :                                rowReal, sizeof(float) * nDstXSize);
    5198           0 :                         break;
    5199           0 :                     case GDT_CInt16:
    5200             :                     {
    5201           0 :                         GInt16 *pabyDstI16 = &(reinterpret_cast<GInt16 *>(
    5202           0 :                             pabyDst)[iDstY * nDstXSize]);
    5203           0 :                         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5204             :                         {
    5205           0 :                             pabyDstI16[iDstX * 2] =
    5206           0 :                                 static_cast<GInt16 *>(rowReal)[iDstX];
    5207           0 :                             pabyDstI16[iDstX * 2 + 1] =
    5208           0 :                                 static_cast<GInt16 *>(rowImag)[iDstX];
    5209             :                         }
    5210             :                     }
    5211           0 :                     break;
    5212           0 :                     case GDT_CFloat32:
    5213             :                     {
    5214           0 :                         float *pabyDstF32 = &(reinterpret_cast<float *>(
    5215           0 :                             pabyDst)[iDstY * nDstXSize]);
    5216           0 :                         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5217             :                         {
    5218           0 :                             pabyDstF32[iDstX * 2] =
    5219           0 :                                 static_cast<float *>(rowReal)[iDstX];
    5220           0 :                             pabyDstF32[iDstX * 2 + 1] =
    5221           0 :                                 static_cast<float *>(rowImag)[iDstX];
    5222             :                         }
    5223             :                     }
    5224           0 :                     break;
    5225           0 :                     default:
    5226             :                         // No support for higher precision formats.
    5227           0 :                         CPLError(CE_Failure, CPLE_AppDefined,
    5228             :                                  "Unsupported resampling OpenCL data type %d.",
    5229           0 :                                  static_cast<int>(poWK->eWorkingDataType));
    5230           0 :                         eErr = CE_Failure;
    5231           0 :                         throw eErr;
    5232             :                 }
    5233             :             }
    5234             :         }
    5235             :     }
    5236           0 :     catch (const CPLErr &)
    5237             :     {
    5238             :     }
    5239             : 
    5240           0 :     if ((err = GDALWarpKernelOpenCL_deleteEnv(warper)) != CL_SUCCESS)
    5241             :     {
    5242           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    5243             :                  "OpenCL routines reported failure (%d) on line %d.",
    5244             :                  static_cast<int>(err), __LINE__);
    5245           0 :         return CE_Failure;
    5246             :     }
    5247             : 
    5248           0 :     return eErr;
    5249             : }
    5250             : #endif /* defined(HAVE_OPENCL) */
    5251             : 
    5252             : /************************************************************************/
    5253             : /*                     GWKCheckAndComputeSrcOffsets()                   */
    5254             : /************************************************************************/
    5255             : static CPL_INLINE bool
    5256   109622000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
    5257             :                              int _iDstY, double *_padfX, double *_padfY,
    5258             :                              int _nSrcXSize, int _nSrcYSize,
    5259             :                              GPtrDiff_t &iSrcOffset)
    5260             : {
    5261   109622000 :     const GDALWarpKernel *_poWK = psJob->poWK;
    5262   109748000 :     for (int iTry = 0; iTry < 2; ++iTry)
    5263             :     {
    5264   109660000 :         if (iTry == 1)
    5265             :         {
    5266             :             // If the source coordinate is slightly outside of the source raster
    5267             :             // retry to transform it alone, so that the exact coordinate
    5268             :             // transformer is used.
    5269             : 
    5270      125879 :             _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
    5271      125879 :             _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
    5272      125879 :             double dfZ = 0;
    5273      125879 :             _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
    5274      125879 :                                   _padfX + _iDstX, _padfY + _iDstX, &dfZ,
    5275      125879 :                                   _pabSuccess + _iDstX);
    5276             :         }
    5277   109660000 :         if (!_pabSuccess[_iDstX])
    5278     3593220 :             return false;
    5279             : 
    5280             :         // If this happens this is likely the symptom of a bug somewhere.
    5281   106066000 :         if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
    5282             :         {
    5283             :             static bool bNanCoordFound = false;
    5284           0 :             if (!bNanCoordFound)
    5285             :             {
    5286           0 :                 CPLDebug("WARP",
    5287             :                          "GWKCheckAndComputeSrcOffsets(): "
    5288             :                          "NaN coordinate found on point %d.",
    5289             :                          _iDstX);
    5290           0 :                 bNanCoordFound = true;
    5291             :             }
    5292           0 :             return false;
    5293             :         }
    5294             : 
    5295             :         /* --------------------------------------------------------------------
    5296             :          */
    5297             :         /*      Figure out what pixel we want in our source raster, and skip */
    5298             :         /*      further processing if it is well off the source image. */
    5299             :         /* --------------------------------------------------------------------
    5300             :          */
    5301             :         /* We test against the value before casting to avoid the */
    5302             :         /* problem of asymmetric truncation effects around zero.  That is */
    5303             :         /* -0.5 will be 0 when cast to an int. */
    5304   106131000 :         if (_padfX[_iDstX] < _poWK->nSrcXOff)
    5305             :         {
    5306             :             // If the source coordinate is slightly outside of the source raster
    5307             :             // retry to transform it alone, so that the exact coordinate
    5308             :             // transformer is used.
    5309     4137540 :             if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
    5310       21497 :                 continue;
    5311     4116040 :             return false;
    5312             :         }
    5313             : 
    5314   101993000 :         if (_padfY[_iDstX] < _poWK->nSrcYOff)
    5315             :         {
    5316             :             // If the source coordinate is slightly outside of the source raster
    5317             :             // retry to transform it alone, so that the exact coordinate
    5318             :             // transformer is used.
    5319     4793040 :             if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
    5320       38555 :                 continue;
    5321     4754490 :             return false;
    5322             :         }
    5323             : 
    5324             :         // Check for potential overflow when casting from float to int, (if
    5325             :         // operating outside natural projection area, padfX/Y can be a very huge
    5326             :         // positive number before doing the actual conversion), as such cast is
    5327             :         // undefined behavior that can trigger exception with some compilers
    5328             :         // (see #6753)
    5329    97200200 :         if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
    5330             :         {
    5331             :             // If the source coordinate is slightly outside of the source raster
    5332             :             // retry to transform it alone, so that the exact coordinate
    5333             :             // transformer is used.
    5334     3499480 :             if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
    5335       33291 :                 continue;
    5336     3466190 :             return false;
    5337             :         }
    5338    93700800 :         if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
    5339             :         {
    5340             :             // If the source coordinate is slightly outside of the source raster
    5341             :             // retry to transform it alone, so that the exact coordinate
    5342             :             // transformer is used.
    5343     3731490 :             if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
    5344       32536 :                 continue;
    5345     3698950 :             return false;
    5346             :         }
    5347             : 
    5348    89969300 :         break;
    5349             :     }
    5350             : 
    5351    90057700 :     int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
    5352    90057700 :     int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
    5353    90057700 :     if (iSrcX == _nSrcXSize)
    5354           0 :         iSrcX--;
    5355    90057700 :     if (iSrcY == _nSrcYSize)
    5356           0 :         iSrcY--;
    5357             : 
    5358             :     // Those checks should normally be OK given the previous ones.
    5359    90057700 :     CPLAssert(iSrcX >= 0);
    5360    90057700 :     CPLAssert(iSrcY >= 0);
    5361    90057700 :     CPLAssert(iSrcX < _nSrcXSize);
    5362    90057700 :     CPLAssert(iSrcY < _nSrcYSize);
    5363             : 
    5364    90057700 :     iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
    5365             : 
    5366    90057700 :     return true;
    5367             : }
    5368             : 
    5369             : /************************************************************************/
    5370             : /*                   GWKOneSourceCornerFailsToReproject()               */
    5371             : /************************************************************************/
    5372             : 
    5373         720 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
    5374             : {
    5375         720 :     GDALWarpKernel *poWK = psJob->poWK;
    5376        2150 :     for (int iY = 0; iY <= 1; ++iY)
    5377             :     {
    5378        4296 :         for (int iX = 0; iX <= 1; ++iX)
    5379             :         {
    5380        2866 :             double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
    5381        2866 :             double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
    5382        2866 :             double dfZTmp = 0;
    5383        2866 :             int nSuccess = FALSE;
    5384        2866 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
    5385             :                                  &dfYTmp, &dfZTmp, &nSuccess);
    5386        2866 :             if (!nSuccess)
    5387           6 :                 return true;
    5388             :         }
    5389             :     }
    5390         714 :     return false;
    5391             : }
    5392             : 
    5393             : /************************************************************************/
    5394             : /*                       GWKAdjustSrcOffsetOnEdge()                     */
    5395             : /************************************************************************/
    5396             : 
    5397        9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
    5398             :                                      GPtrDiff_t &iSrcOffset)
    5399             : {
    5400        9714 :     GDALWarpKernel *poWK = psJob->poWK;
    5401        9714 :     const int nSrcXSize = poWK->nSrcXSize;
    5402        9714 :     const int nSrcYSize = poWK->nSrcYSize;
    5403             : 
    5404             :     // Check if the computed source position slightly altered
    5405             :     // fails to reproject. If so, then we are at the edge of
    5406             :     // the validity area, and it is worth checking neighbour
    5407             :     // source pixels for validity.
    5408        9714 :     int nSuccess = FALSE;
    5409             :     {
    5410        9714 :         double dfXTmp =
    5411        9714 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5412        9714 :         double dfYTmp =
    5413        9714 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5414        9714 :         double dfZTmp = 0;
    5415        9714 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5416             :                              &dfZTmp, &nSuccess);
    5417             :     }
    5418        9714 :     if (nSuccess)
    5419             :     {
    5420        6996 :         double dfXTmp =
    5421        6996 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5422        6996 :         double dfYTmp =
    5423        6996 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5424        6996 :         double dfZTmp = 0;
    5425        6996 :         nSuccess = FALSE;
    5426        6996 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5427             :                              &dfZTmp, &nSuccess);
    5428             :     }
    5429        9714 :     if (nSuccess)
    5430             :     {
    5431        5624 :         double dfXTmp =
    5432        5624 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5433        5624 :         double dfYTmp =
    5434        5624 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5435        5624 :         double dfZTmp = 0;
    5436        5624 :         nSuccess = FALSE;
    5437        5624 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5438             :                              &dfZTmp, &nSuccess);
    5439             :     }
    5440             : 
    5441       14166 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5442        4452 :         CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
    5443             :     {
    5444        1860 :         iSrcOffset++;
    5445        1860 :         return true;
    5446             :     }
    5447       10290 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5448        2436 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
    5449             :     {
    5450        1334 :         iSrcOffset += nSrcXSize;
    5451        1334 :         return true;
    5452             :     }
    5453        7838 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5454        1318 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
    5455             :     {
    5456         956 :         iSrcOffset--;
    5457         956 :         return true;
    5458             :     }
    5459        5924 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5460         360 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
    5461             :     {
    5462         340 :         iSrcOffset -= nSrcXSize;
    5463         340 :         return true;
    5464             :     }
    5465             : 
    5466        5224 :     return false;
    5467             : }
    5468             : 
    5469             : /************************************************************************/
    5470             : /*                 GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity()          */
    5471             : /************************************************************************/
    5472             : 
    5473           0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
    5474             :                                                       GPtrDiff_t &iSrcOffset)
    5475             : {
    5476           0 :     GDALWarpKernel *poWK = psJob->poWK;
    5477           0 :     const int nSrcXSize = poWK->nSrcXSize;
    5478           0 :     const int nSrcYSize = poWK->nSrcYSize;
    5479             : 
    5480             :     // Check if the computed source position slightly altered
    5481             :     // fails to reproject. If so, then we are at the edge of
    5482             :     // the validity area, and it is worth checking neighbour
    5483             :     // source pixels for validity.
    5484           0 :     int nSuccess = FALSE;
    5485             :     {
    5486           0 :         double dfXTmp =
    5487           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5488           0 :         double dfYTmp =
    5489           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5490           0 :         double dfZTmp = 0;
    5491           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5492             :                              &dfZTmp, &nSuccess);
    5493             :     }
    5494           0 :     if (nSuccess)
    5495             :     {
    5496           0 :         double dfXTmp =
    5497           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5498           0 :         double dfYTmp =
    5499           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5500           0 :         double dfZTmp = 0;
    5501           0 :         nSuccess = FALSE;
    5502           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5503             :                              &dfZTmp, &nSuccess);
    5504             :     }
    5505           0 :     if (nSuccess)
    5506             :     {
    5507           0 :         double dfXTmp =
    5508           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5509           0 :         double dfYTmp =
    5510           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5511           0 :         double dfZTmp = 0;
    5512           0 :         nSuccess = FALSE;
    5513           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5514             :                              &dfZTmp, &nSuccess);
    5515             :     }
    5516             : 
    5517           0 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5518           0 :         poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >= SRC_DENSITY_THRESHOLD)
    5519             :     {
    5520           0 :         iSrcOffset++;
    5521           0 :         return true;
    5522             :     }
    5523           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5524           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
    5525             :                  SRC_DENSITY_THRESHOLD)
    5526             :     {
    5527           0 :         iSrcOffset += nSrcXSize;
    5528           0 :         return true;
    5529             :     }
    5530           0 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5531           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
    5532             :                  SRC_DENSITY_THRESHOLD)
    5533             :     {
    5534           0 :         iSrcOffset--;
    5535           0 :         return true;
    5536             :     }
    5537           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5538           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
    5539             :                  SRC_DENSITY_THRESHOLD)
    5540             :     {
    5541           0 :         iSrcOffset -= nSrcXSize;
    5542           0 :         return true;
    5543             :     }
    5544             : 
    5545           0 :     return false;
    5546             : }
    5547             : 
    5548             : /************************************************************************/
    5549             : /*                           GWKGeneralCase()                           */
    5550             : /*                                                                      */
    5551             : /*      This is the most general case.  It attempts to handle all       */
    5552             : /*      possible features with relatively little concern for            */
    5553             : /*      efficiency.                                                     */
    5554             : /************************************************************************/
    5555             : 
    5556         243 : static void GWKGeneralCaseThread(void *pData)
    5557             : {
    5558         243 :     GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
    5559         243 :     GDALWarpKernel *poWK = psJob->poWK;
    5560         243 :     const int iYMin = psJob->iYMin;
    5561         243 :     const int iYMax = psJob->iYMax;
    5562             :     const double dfMultFactorVerticalShiftPipeline =
    5563         243 :         poWK->bApplyVerticalShift
    5564         243 :             ? CPLAtof(CSLFetchNameValueDef(
    5565           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5566             :                   "1.0"))
    5567         243 :             : 0.0;
    5568             : 
    5569         243 :     int nDstXSize = poWK->nDstXSize;
    5570         243 :     int nSrcXSize = poWK->nSrcXSize;
    5571         243 :     int nSrcYSize = poWK->nSrcYSize;
    5572             : 
    5573             :     /* -------------------------------------------------------------------- */
    5574             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5575             :     /*      scanlines worth of positions.                                   */
    5576             :     /* -------------------------------------------------------------------- */
    5577             :     // For x, 2 *, because we cache the precomputed values at the end.
    5578             :     double *padfX =
    5579         243 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5580             :     double *padfY =
    5581         243 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5582             :     double *padfZ =
    5583         243 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5584         243 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5585             : 
    5586         243 :     const bool bUse4SamplesFormula =
    5587         243 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    5588             : 
    5589         243 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5590         243 :     if (poWK->eResample != GRA_NearestNeighbour)
    5591             :     {
    5592         224 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5593             :     }
    5594         243 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5595         243 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5596         243 :     const double dfErrorThreshold = CPLAtof(
    5597         243 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5598             : 
    5599             :     const bool bOneSourceCornerFailsToReproject =
    5600         243 :         GWKOneSourceCornerFailsToReproject(psJob);
    5601             : 
    5602             :     // Precompute values.
    5603        6513 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5604        6270 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5605             : 
    5606             :     /* ==================================================================== */
    5607             :     /*      Loop over output lines.                                         */
    5608             :     /* ==================================================================== */
    5609        6513 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5610             :     {
    5611             :         /* --------------------------------------------------------------------
    5612             :          */
    5613             :         /*      Setup points to transform to source image space. */
    5614             :         /* --------------------------------------------------------------------
    5615             :          */
    5616        6270 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5617        6270 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5618      242830 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5619      236560 :             padfY[iDstX] = dfY;
    5620        6270 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5621             : 
    5622             :         /* --------------------------------------------------------------------
    5623             :          */
    5624             :         /*      Transform the points from destination pixel/line coordinates */
    5625             :         /*      to source pixel/line coordinates. */
    5626             :         /* --------------------------------------------------------------------
    5627             :          */
    5628        6270 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5629             :                              padfY, padfZ, pabSuccess);
    5630        6270 :         if (dfSrcCoordPrecision > 0.0)
    5631             :         {
    5632           0 :             GWKRoundSourceCoordinates(
    5633             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5634             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5635           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5636             :         }
    5637             : 
    5638             :         /* ====================================================================
    5639             :          */
    5640             :         /*      Loop over pixels in output scanline. */
    5641             :         /* ====================================================================
    5642             :          */
    5643      242830 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5644             :         {
    5645      236560 :             GPtrDiff_t iSrcOffset = 0;
    5646      236560 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5647             :                                               padfX, padfY, nSrcXSize,
    5648             :                                               nSrcYSize, iSrcOffset))
    5649           0 :                 continue;
    5650             : 
    5651             :             /* --------------------------------------------------------------------
    5652             :              */
    5653             :             /*      Do not try to apply transparent/invalid source pixels to the
    5654             :              */
    5655             :             /*      destination.  This currently ignores the multi-pixel input
    5656             :              */
    5657             :             /*      of bilinear and cubic resamples. */
    5658             :             /* --------------------------------------------------------------------
    5659             :              */
    5660      236560 :             double dfDensity = 1.0;
    5661             : 
    5662      236560 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5663             :             {
    5664        1200 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5665        1200 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    5666             :                 {
    5667           0 :                     if (!bOneSourceCornerFailsToReproject)
    5668             :                     {
    5669           0 :                         continue;
    5670             :                     }
    5671           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5672             :                                  psJob, iSrcOffset))
    5673             :                     {
    5674           0 :                         dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5675             :                     }
    5676             :                     else
    5677             :                     {
    5678           0 :                         continue;
    5679             :                     }
    5680             :                 }
    5681             :             }
    5682             : 
    5683      236560 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5684           0 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5685             :             {
    5686           0 :                 if (!bOneSourceCornerFailsToReproject)
    5687             :                 {
    5688           0 :                     continue;
    5689             :                 }
    5690           0 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5691             :                 {
    5692           0 :                     continue;
    5693             :                 }
    5694             :             }
    5695             : 
    5696             :             /* ====================================================================
    5697             :              */
    5698             :             /*      Loop processing each band. */
    5699             :             /* ====================================================================
    5700             :              */
    5701      236560 :             bool bHasFoundDensity = false;
    5702             : 
    5703      236560 :             const GPtrDiff_t iDstOffset =
    5704      236560 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5705      473120 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5706             :             {
    5707      236560 :                 double dfBandDensity = 0.0;
    5708      236560 :                 double dfValueReal = 0.0;
    5709      236560 :                 double dfValueImag = 0.0;
    5710             : 
    5711             :                 /* --------------------------------------------------------------------
    5712             :                  */
    5713             :                 /*      Collect the source value. */
    5714             :                 /* --------------------------------------------------------------------
    5715             :                  */
    5716      236560 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5717             :                     nSrcYSize == 1)
    5718             :                 {
    5719             :                     // FALSE is returned if dfBandDensity == 0, which is
    5720             :                     // checked below.
    5721         568 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValue(
    5722             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
    5723             :                         &dfValueImag));
    5724             :                 }
    5725      235992 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5726             :                 {
    5727         648 :                     GWKBilinearResample4Sample(
    5728         648 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5729         648 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5730             :                         &dfValueReal, &dfValueImag);
    5731             :                 }
    5732      235344 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5733             :                 {
    5734         248 :                     GWKCubicResample4Sample(
    5735         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5736         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5737             :                         &dfValueReal, &dfValueImag);
    5738             :                 }
    5739             :                 else
    5740             : #ifdef DEBUG
    5741             :                     // Only useful for clang static analyzer.
    5742      235096 :                     if (psWrkStruct != nullptr)
    5743             : #endif
    5744             :                     {
    5745      235096 :                         psWrkStruct->pfnGWKResample(
    5746      235096 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5747      235096 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5748             :                             &dfValueReal, &dfValueImag, psWrkStruct);
    5749             :                     }
    5750             : 
    5751             :                 // If we didn't find any valid inputs skip to next band.
    5752      236560 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5753           0 :                     continue;
    5754             : 
    5755      236560 :                 if (poWK->bApplyVerticalShift)
    5756             :                 {
    5757           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5758           0 :                         continue;
    5759             :                     // Subtract padfZ[] since the coordinate transformation is
    5760             :                     // from target to source
    5761           0 :                     dfValueReal =
    5762           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5763           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5764             :                 }
    5765             : 
    5766      236560 :                 bHasFoundDensity = true;
    5767             : 
    5768             :                 /* --------------------------------------------------------------------
    5769             :                  */
    5770             :                 /*      We have a computed value from the source.  Now apply it
    5771             :                  * to      */
    5772             :                 /*      the destination pixel. */
    5773             :                 /* --------------------------------------------------------------------
    5774             :                  */
    5775      236560 :                 GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    5776             :                                  dfValueReal, dfValueImag);
    5777             :             }
    5778             : 
    5779      236560 :             if (!bHasFoundDensity)
    5780           0 :                 continue;
    5781             : 
    5782             :             /* --------------------------------------------------------------------
    5783             :              */
    5784             :             /*      Update destination density/validity masks. */
    5785             :             /* --------------------------------------------------------------------
    5786             :              */
    5787      236560 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5788             : 
    5789      236560 :             if (poWK->panDstValid != nullptr)
    5790             :             {
    5791           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5792             :             }
    5793             :         } /* Next iDstX */
    5794             : 
    5795             :         /* --------------------------------------------------------------------
    5796             :          */
    5797             :         /*      Report progress to the user, and optionally cancel out. */
    5798             :         /* --------------------------------------------------------------------
    5799             :          */
    5800        6270 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5801           0 :             break;
    5802             :     }
    5803             : 
    5804             :     /* -------------------------------------------------------------------- */
    5805             :     /*      Cleanup and return.                                             */
    5806             :     /* -------------------------------------------------------------------- */
    5807         243 :     CPLFree(padfX);
    5808         243 :     CPLFree(padfY);
    5809         243 :     CPLFree(padfZ);
    5810         243 :     CPLFree(pabSuccess);
    5811         243 :     if (psWrkStruct)
    5812         224 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5813         243 : }
    5814             : 
    5815         243 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
    5816             : {
    5817         243 :     return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
    5818             : }
    5819             : 
    5820             : /************************************************************************/
    5821             : /*                            GWKRealCase()                             */
    5822             : /*                                                                      */
    5823             : /*      General case for non-complex data types.                        */
    5824             : /************************************************************************/
    5825             : 
    5826         134 : static void GWKRealCaseThread(void *pData)
    5827             : 
    5828             : {
    5829         134 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    5830         134 :     GDALWarpKernel *poWK = psJob->poWK;
    5831         134 :     const int iYMin = psJob->iYMin;
    5832         134 :     const int iYMax = psJob->iYMax;
    5833             : 
    5834         134 :     const int nDstXSize = poWK->nDstXSize;
    5835         134 :     const int nSrcXSize = poWK->nSrcXSize;
    5836         134 :     const int nSrcYSize = poWK->nSrcYSize;
    5837             :     const double dfMultFactorVerticalShiftPipeline =
    5838         134 :         poWK->bApplyVerticalShift
    5839         134 :             ? CPLAtof(CSLFetchNameValueDef(
    5840           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5841             :                   "1.0"))
    5842         134 :             : 0.0;
    5843             : 
    5844             :     /* -------------------------------------------------------------------- */
    5845             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5846             :     /*      scanlines worth of positions.                                   */
    5847             :     /* -------------------------------------------------------------------- */
    5848             : 
    5849             :     // For x, 2 *, because we cache the precomputed values at the end.
    5850             :     double *padfX =
    5851         134 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5852             :     double *padfY =
    5853         134 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5854             :     double *padfZ =
    5855         134 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5856         134 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5857             : 
    5858         134 :     const bool bUse4SamplesFormula =
    5859         134 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    5860             : 
    5861         134 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5862         134 :     if (poWK->eResample != GRA_NearestNeighbour)
    5863             :     {
    5864         118 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5865             :     }
    5866         134 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5867         134 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5868         134 :     const double dfErrorThreshold = CPLAtof(
    5869         134 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5870             : 
    5871         387 :     const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
    5872         253 :                                    poWK->papanBandSrcValid == nullptr &&
    5873         119 :                                    poWK->pafUnifiedSrcDensity != nullptr;
    5874             : 
    5875             :     const bool bOneSourceCornerFailsToReproject =
    5876         134 :         GWKOneSourceCornerFailsToReproject(psJob);
    5877             : 
    5878             :     // Precompute values.
    5879       18829 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5880       18695 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5881             : 
    5882             :     /* ==================================================================== */
    5883             :     /*      Loop over output lines.                                         */
    5884             :     /* ==================================================================== */
    5885       21580 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5886             :     {
    5887             :         /* --------------------------------------------------------------------
    5888             :          */
    5889             :         /*      Setup points to transform to source image space. */
    5890             :         /* --------------------------------------------------------------------
    5891             :          */
    5892       21446 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5893       21446 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5894    43460600 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5895    43439100 :             padfY[iDstX] = dfY;
    5896       21446 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5897             : 
    5898             :         /* --------------------------------------------------------------------
    5899             :          */
    5900             :         /*      Transform the points from destination pixel/line coordinates */
    5901             :         /*      to source pixel/line coordinates. */
    5902             :         /* --------------------------------------------------------------------
    5903             :          */
    5904       21446 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5905             :                              padfY, padfZ, pabSuccess);
    5906       21446 :         if (dfSrcCoordPrecision > 0.0)
    5907             :         {
    5908           0 :             GWKRoundSourceCoordinates(
    5909             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5910             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5911           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5912             :         }
    5913             : 
    5914             :         /* ====================================================================
    5915             :          */
    5916             :         /*      Loop over pixels in output scanline. */
    5917             :         /* ====================================================================
    5918             :          */
    5919    43460600 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5920             :         {
    5921    43439100 :             GPtrDiff_t iSrcOffset = 0;
    5922    43439100 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5923             :                                               padfX, padfY, nSrcXSize,
    5924             :                                               nSrcYSize, iSrcOffset))
    5925    42846800 :                 continue;
    5926             : 
    5927             :             /* --------------------------------------------------------------------
    5928             :              */
    5929             :             /*      Do not try to apply transparent/invalid source pixels to the
    5930             :              */
    5931             :             /*      destination.  This currently ignores the multi-pixel input
    5932             :              */
    5933             :             /*      of bilinear and cubic resamples. */
    5934             :             /* --------------------------------------------------------------------
    5935             :              */
    5936    31382600 :             double dfDensity = 1.0;
    5937             : 
    5938    31382600 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5939             :             {
    5940     1262880 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5941     1262880 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    5942             :                 {
    5943     1261590 :                     if (!bOneSourceCornerFailsToReproject)
    5944             :                     {
    5945     1261590 :                         continue;
    5946             :                     }
    5947           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5948             :                                  psJob, iSrcOffset))
    5949             :                     {
    5950           0 :                         dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5951             :                     }
    5952             :                     else
    5953             :                     {
    5954           0 :                         continue;
    5955             :                     }
    5956             :                 }
    5957             :             }
    5958             : 
    5959    59749600 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5960    29628600 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5961             :             {
    5962    29531000 :                 if (!bOneSourceCornerFailsToReproject)
    5963             :                 {
    5964    29528700 :                     continue;
    5965             :                 }
    5966        2229 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5967             :                 {
    5968           0 :                     continue;
    5969             :                 }
    5970             :             }
    5971             : 
    5972             :             /* ====================================================================
    5973             :              */
    5974             :             /*      Loop processing each band. */
    5975             :             /* ====================================================================
    5976             :              */
    5977      592300 :             bool bHasFoundDensity = false;
    5978             : 
    5979      592300 :             const GPtrDiff_t iDstOffset =
    5980      592300 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5981     1516060 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5982             :             {
    5983      923761 :                 double dfBandDensity = 0.0;
    5984      923761 :                 double dfValueReal = 0.0;
    5985             : 
    5986             :                 /* --------------------------------------------------------------------
    5987             :                  */
    5988             :                 /*      Collect the source value. */
    5989             :                 /* --------------------------------------------------------------------
    5990             :                  */
    5991      923761 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5992             :                     nSrcYSize == 1)
    5993             :                 {
    5994             :                     // FALSE is returned if dfBandDensity == 0, which is
    5995             :                     // checked below.
    5996        1012 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
    5997             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
    5998             :                 }
    5999      922749 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    6000             :                 {
    6001        1326 :                     double dfValueImagIgnored = 0.0;
    6002        1326 :                     GWKBilinearResample4Sample(
    6003        1326 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6004        1326 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6005        1326 :                         &dfValueReal, &dfValueImagIgnored);
    6006             :                 }
    6007      921423 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    6008             :                 {
    6009      299992 :                     if (bSrcMaskIsDensity)
    6010             :                     {
    6011         361 :                         if (poWK->eWorkingDataType == GDT_Byte)
    6012             :                         {
    6013         361 :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
    6014         361 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6015         361 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6016             :                                 &dfValueReal);
    6017             :                         }
    6018           0 :                         else if (poWK->eWorkingDataType == GDT_UInt16)
    6019             :                         {
    6020             :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<
    6021           0 :                                 GUInt16>(poWK, iBand,
    6022           0 :                                          padfX[iDstX] - poWK->nSrcXOff,
    6023           0 :                                          padfY[iDstX] - poWK->nSrcYOff,
    6024             :                                          &dfBandDensity, &dfValueReal);
    6025             :                         }
    6026             :                         else
    6027             :                         {
    6028           0 :                             GWKCubicResampleSrcMaskIsDensity4SampleReal(
    6029           0 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6030           0 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6031             :                                 &dfValueReal);
    6032             :                         }
    6033             :                     }
    6034             :                     else
    6035             :                     {
    6036      299631 :                         double dfValueImagIgnored = 0.0;
    6037      299631 :                         GWKCubicResample4Sample(
    6038      299631 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6039      299631 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6040             :                             &dfValueReal, &dfValueImagIgnored);
    6041      299992 :                     }
    6042             :                 }
    6043             :                 else
    6044             : #ifdef DEBUG
    6045             :                     // Only useful for clang static analyzer.
    6046      621431 :                     if (psWrkStruct != nullptr)
    6047             : #endif
    6048             :                     {
    6049      621431 :                         double dfValueImagIgnored = 0.0;
    6050      621431 :                         psWrkStruct->pfnGWKResample(
    6051      621431 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6052      621431 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6053             :                             &dfValueReal, &dfValueImagIgnored, psWrkStruct);
    6054             :                     }
    6055             : 
    6056             :                 // If we didn't find any valid inputs skip to next band.
    6057      923761 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    6058           0 :                     continue;
    6059             : 
    6060      923761 :                 if (poWK->bApplyVerticalShift)
    6061             :                 {
    6062           0 :                     if (!std::isfinite(padfZ[iDstX]))
    6063           0 :                         continue;
    6064             :                     // Subtract padfZ[] since the coordinate transformation is
    6065             :                     // from target to source
    6066           0 :                     dfValueReal =
    6067           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    6068           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    6069             :                 }
    6070             : 
    6071      923761 :                 bHasFoundDensity = true;
    6072             : 
    6073             :                 /* --------------------------------------------------------------------
    6074             :                  */
    6075             :                 /*      We have a computed value from the source.  Now apply it
    6076             :                  * to      */
    6077             :                 /*      the destination pixel. */
    6078             :                 /* --------------------------------------------------------------------
    6079             :                  */
    6080      923761 :                 GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
    6081             :                                      dfValueReal);
    6082             :             }
    6083             : 
    6084      592300 :             if (!bHasFoundDensity)
    6085           0 :                 continue;
    6086             : 
    6087             :             /* --------------------------------------------------------------------
    6088             :              */
    6089             :             /*      Update destination density/validity masks. */
    6090             :             /* --------------------------------------------------------------------
    6091             :              */
    6092      592300 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6093             : 
    6094      592300 :             if (poWK->panDstValid != nullptr)
    6095             :             {
    6096      101460 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6097             :             }
    6098             :         }  // Next iDstX.
    6099             : 
    6100             :         /* --------------------------------------------------------------------
    6101             :          */
    6102             :         /*      Report progress to the user, and optionally cancel out. */
    6103             :         /* --------------------------------------------------------------------
    6104             :          */
    6105       21446 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6106           0 :             break;
    6107             :     }
    6108             : 
    6109             :     /* -------------------------------------------------------------------- */
    6110             :     /*      Cleanup and return.                                             */
    6111             :     /* -------------------------------------------------------------------- */
    6112         134 :     CPLFree(padfX);
    6113         134 :     CPLFree(padfY);
    6114         134 :     CPLFree(padfZ);
    6115         134 :     CPLFree(pabSuccess);
    6116         134 :     if (psWrkStruct)
    6117         118 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    6118         134 : }
    6119             : 
    6120         134 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
    6121             : {
    6122         134 :     return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
    6123             : }
    6124             : 
    6125             : /************************************************************************/
    6126             : /*                 GWKCubicResampleNoMasks4MultiBandT()                 */
    6127             : /************************************************************************/
    6128             : 
    6129             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    6130             : /* and enough SSE registries */
    6131             : #if defined(USE_SSE2)
    6132             : 
    6133      238596 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
    6134             :                                  const __m128 row2, const __m128 row3,
    6135             :                                  const __m128 weightsXY0,
    6136             :                                  const __m128 weightsXY1,
    6137             :                                  const __m128 weightsXY2,
    6138             :                                  const __m128 weightsXY3)
    6139             : {
    6140     1670170 :     return XMMHorizontalAdd(_mm_add_ps(
    6141             :         _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
    6142             :         _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
    6143      238596 :                    _mm_mul_ps(row3, weightsXY3))));
    6144             : }
    6145             : 
    6146             : template <class T>
    6147       81323 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
    6148             :                                                double dfSrcX, double dfSrcY,
    6149             :                                                const GPtrDiff_t iDstOffset)
    6150             : {
    6151       81323 :     const double dfSrcXShifted = dfSrcX - 0.5;
    6152       81323 :     const int iSrcX = static_cast<int>(dfSrcXShifted);
    6153       81323 :     const double dfSrcYShifted = dfSrcY - 0.5;
    6154       81323 :     const int iSrcY = static_cast<int>(dfSrcYShifted);
    6155       81323 :     const GPtrDiff_t iSrcOffset =
    6156       81323 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    6157             : 
    6158             :     // Get the bilinear interpolation at the image borders.
    6159       81323 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    6160       80326 :         iSrcY + 2 >= poWK->nSrcYSize)
    6161             :     {
    6162        7164 :         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6163             :         {
    6164             :             T value;
    6165        5373 :             GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    6166             :                                                &value);
    6167        5373 :             reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6168             :                 value;
    6169        1791 :         }
    6170             :     }
    6171             :     else
    6172             :     {
    6173       79532 :         const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
    6174       79532 :         const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
    6175             : 
    6176             :         float afCoeffsX[4];
    6177             :         float afCoeffsY[4];
    6178       79532 :         GWKCubicComputeWeights(fDeltaX, afCoeffsX);
    6179       79532 :         GWKCubicComputeWeights(fDeltaY, afCoeffsY);
    6180       79532 :         const auto weightsX = _mm_loadu_ps(afCoeffsX);
    6181             :         const auto weightsXY0 =
    6182      159064 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
    6183             :         const auto weightsXY1 =
    6184      159064 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
    6185             :         const auto weightsXY2 =
    6186      159064 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
    6187             :         const auto weightsXY3 =
    6188       79532 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
    6189             : 
    6190       79532 :         const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
    6191             : 
    6192       79532 :         int iBand = 0;
    6193             :         // Process 2 bands at a time
    6194      159064 :         for (; iBand + 1 < poWK->nBands; iBand += 2)
    6195             :         {
    6196       79532 :             const T *CPL_RESTRICT pBand0 =
    6197       79532 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6198       79532 :             const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
    6199             :             const auto row1_0 =
    6200       79532 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6201             :             const auto row2_0 =
    6202       79532 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6203             :             const auto row3_0 =
    6204       79532 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6205             : 
    6206       79532 :             const T *CPL_RESTRICT pBand1 =
    6207       79532 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
    6208       79532 :             const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
    6209             :             const auto row1_1 =
    6210       79532 :                 XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
    6211             :             const auto row2_1 =
    6212       79532 :                 XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
    6213             :             const auto row3_1 =
    6214       79532 :                 XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
    6215             : 
    6216             :             const float fValue_0 =
    6217       79532 :                 Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
    6218             :                              weightsXY1, weightsXY2, weightsXY3);
    6219             : 
    6220             :             const float fValue_1 =
    6221       79532 :                 Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
    6222             :                              weightsXY1, weightsXY2, weightsXY3);
    6223             : 
    6224       79532 :             T *CPL_RESTRICT pDstBand0 =
    6225       79532 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6226       79532 :             pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
    6227             : 
    6228       79532 :             T *CPL_RESTRICT pDstBand1 =
    6229       79532 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
    6230       79532 :             pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
    6231             :         }
    6232       79532 :         if (iBand < poWK->nBands)
    6233             :         {
    6234       79532 :             const T *CPL_RESTRICT pBand0 =
    6235       79532 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6236       79532 :             const auto row0 = XMMLoad4Values(pBand0 + iOffset);
    6237             :             const auto row1 =
    6238       79532 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6239             :             const auto row2 =
    6240       79532 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6241             :             const auto row3 =
    6242       79532 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6243             : 
    6244             :             const float fValue =
    6245       79532 :                 Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
    6246             :                              weightsXY2, weightsXY3);
    6247             : 
    6248       79532 :             T *CPL_RESTRICT pDstBand =
    6249       79532 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6250       79532 :             pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
    6251             :         }
    6252             :     }
    6253             : 
    6254       81323 :     if (poWK->pafDstDensity)
    6255         441 :         poWK->pafDstDensity[iDstOffset] = 1.0f;
    6256       81323 : }
    6257             : 
    6258             : #endif  // defined(USE_SSE2)
    6259             : 
    6260             : /************************************************************************/
    6261             : /*                GWKResampleNoMasksOrDstDensityOnlyThreadInternal()    */
    6262             : /************************************************************************/
    6263             : 
    6264             : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
    6265        1174 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
    6266             : 
    6267             : {
    6268        1174 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6269        1174 :     GDALWarpKernel *poWK = psJob->poWK;
    6270        1174 :     const int iYMin = psJob->iYMin;
    6271        1174 :     const int iYMax = psJob->iYMax;
    6272        1156 :     const double dfMultFactorVerticalShiftPipeline =
    6273        1174 :         poWK->bApplyVerticalShift
    6274          18 :             ? CPLAtof(CSLFetchNameValueDef(
    6275          18 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6276             :                   "1.0"))
    6277             :             : 0.0;
    6278             : 
    6279        1174 :     const int nDstXSize = poWK->nDstXSize;
    6280        1174 :     const int nSrcXSize = poWK->nSrcXSize;
    6281        1174 :     const int nSrcYSize = poWK->nSrcYSize;
    6282             : 
    6283             :     /* -------------------------------------------------------------------- */
    6284             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6285             :     /*      scanlines worth of positions.                                   */
    6286             :     /* -------------------------------------------------------------------- */
    6287             : 
    6288             :     // For x, 2 *, because we cache the precomputed values at the end.
    6289             :     double *padfX =
    6290        1174 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6291             :     double *padfY =
    6292        1174 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6293             :     double *padfZ =
    6294        1174 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6295        1174 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6296             : 
    6297        1174 :     const int nXRadius = poWK->nXRadius;
    6298             :     double *padfWeightsX =
    6299        1174 :         static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
    6300             :     double *padfWeightsY = static_cast<double *>(
    6301        1174 :         CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
    6302        1174 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6303        1174 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6304        1174 :     const double dfErrorThreshold = CPLAtof(
    6305        1174 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6306             : 
    6307             :     // Precompute values.
    6308      254688 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6309      253514 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6310             : 
    6311             :     /* ==================================================================== */
    6312             :     /*      Loop over output lines.                                         */
    6313             :     /* ==================================================================== */
    6314      129896 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6315             :     {
    6316             :         /* --------------------------------------------------------------------
    6317             :          */
    6318             :         /*      Setup points to transform to source image space. */
    6319             :         /* --------------------------------------------------------------------
    6320             :          */
    6321      128723 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6322      128723 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6323    58231394 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6324    58102666 :             padfY[iDstX] = dfY;
    6325      128723 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6326             : 
    6327             :         /* --------------------------------------------------------------------
    6328             :          */
    6329             :         /*      Transform the points from destination pixel/line coordinates */
    6330             :         /*      to source pixel/line coordinates. */
    6331             :         /* --------------------------------------------------------------------
    6332             :          */
    6333      128723 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6334             :                              padfY, padfZ, pabSuccess);
    6335      128723 :         if (dfSrcCoordPrecision > 0.0)
    6336             :         {
    6337        1000 :             GWKRoundSourceCoordinates(
    6338             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6339             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6340        1000 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6341             :         }
    6342             : 
    6343             :         /* ====================================================================
    6344             :          */
    6345             :         /*      Loop over pixels in output scanline. */
    6346             :         /* ====================================================================
    6347             :          */
    6348    58374314 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6349             :         {
    6350    58245586 :             GPtrDiff_t iSrcOffset = 0;
    6351    58245586 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6352             :                                               padfX, padfY, nSrcXSize,
    6353             :                                               nSrcYSize, iSrcOffset))
    6354     6540838 :                 continue;
    6355             : 
    6356             :             /* ====================================================================
    6357             :              */
    6358             :             /*      Loop processing each band. */
    6359             :             /* ====================================================================
    6360             :              */
    6361    51703812 :             const GPtrDiff_t iDstOffset =
    6362    51703812 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6363             : 
    6364             : #if defined(USE_SSE2)
    6365             :             if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
    6366             :                           (std::is_same<T, GByte>::value ||
    6367             :                            std::is_same<T, GUInt16>::value))
    6368             :             {
    6369      752574 :                 if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
    6370             :                 {
    6371       81323 :                     GWKCubicResampleNoMasks4MultiBandT<T>(
    6372       81323 :                         poWK, padfX[iDstX] - poWK->nSrcXOff,
    6373       81323 :                         padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
    6374             : 
    6375       81323 :                     continue;
    6376             :                 }
    6377             :             }
    6378             : #endif  // defined(USE_SSE2)
    6379             : 
    6380    51622489 :             [[maybe_unused]] double dfInvWeights = 0;
    6381   144496798 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6382             :             {
    6383    92792487 :                 T value = 0;
    6384             :                 if constexpr (eResample == GRA_NearestNeighbour)
    6385             :                 {
    6386    76917549 :                     value = reinterpret_cast<T *>(
    6387    76917549 :                         poWK->papabySrcImage[iBand])[iSrcOffset];
    6388             :                 }
    6389             :                 else if constexpr (bUse4SamplesFormula)
    6390             :                 {
    6391             :                     if constexpr (eResample == GRA_Bilinear)
    6392     4806886 :                         GWKBilinearResampleNoMasks4SampleT(
    6393     4806886 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6394     4806886 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6395             :                     else
    6396     1906603 :                         GWKCubicResampleNoMasks4SampleT(
    6397     1906603 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6398     1906603 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6399             :                 }
    6400             :                 else
    6401             :                 {
    6402     9161449 :                     GWKResampleNoMasksT(
    6403     9161449 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6404     9161449 :                         padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
    6405             :                         padfWeightsY, dfInvWeights);
    6406             :                 }
    6407             : 
    6408    92791297 :                 if (poWK->bApplyVerticalShift)
    6409             :                 {
    6410         818 :                     if (!std::isfinite(padfZ[iDstX]))
    6411           0 :                         continue;
    6412             :                     // Subtract padfZ[] since the coordinate transformation is
    6413             :                     // from target to source
    6414       86023 :                     value = GWKClampValueT<T>(
    6415         818 :                         value * poWK->dfMultFactorVerticalShift -
    6416         818 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6417             :                 }
    6418             : 
    6419    92874787 :                 if (poWK->pafDstDensity)
    6420    11712299 :                     poWK->pafDstDensity[iDstOffset] = 1.0f;
    6421             : 
    6422    92874787 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6423             :                     value;
    6424             :             }
    6425             :         }
    6426             : 
    6427             :         /* --------------------------------------------------------------------
    6428             :          */
    6429             :         /*      Report progress to the user, and optionally cancel out. */
    6430             :         /* --------------------------------------------------------------------
    6431             :          */
    6432      128723 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6433           1 :             break;
    6434             :     }
    6435             : 
    6436             :     /* -------------------------------------------------------------------- */
    6437             :     /*      Cleanup and return.                                             */
    6438             :     /* -------------------------------------------------------------------- */
    6439        1174 :     CPLFree(padfX);
    6440        1174 :     CPLFree(padfY);
    6441        1174 :     CPLFree(padfZ);
    6442        1174 :     CPLFree(pabSuccess);
    6443        1174 :     CPLFree(padfWeightsX);
    6444        1174 :     CPLFree(padfWeightsY);
    6445        1174 : }
    6446             : 
    6447             : template <class T, GDALResampleAlg eResample>
    6448         918 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
    6449             : {
    6450         918 :     GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6451             :         pData);
    6452         918 : }
    6453             : 
    6454             : template <class T, GDALResampleAlg eResample>
    6455         256 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
    6456             : 
    6457             : {
    6458         256 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6459         256 :     GDALWarpKernel *poWK = psJob->poWK;
    6460             :     static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
    6461         256 :     const bool bUse4SamplesFormula =
    6462         256 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    6463         256 :     if (bUse4SamplesFormula)
    6464         156 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
    6465             :             pData);
    6466             :     else
    6467         100 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6468             :             pData);
    6469         256 : }
    6470             : 
    6471         863 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6472             : {
    6473         863 :     return GWKRun(
    6474             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
    6475         863 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
    6476             : }
    6477             : 
    6478         126 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6479             : {
    6480         126 :     return GWKRun(
    6481             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
    6482             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
    6483         126 :                                                            GRA_Bilinear>);
    6484             : }
    6485             : 
    6486          72 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6487             : {
    6488          72 :     return GWKRun(
    6489             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
    6490          72 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
    6491             : }
    6492             : 
    6493           9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6494             : {
    6495           9 :     return GWKRun(
    6496             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
    6497           9 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
    6498             : }
    6499             : 
    6500             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6501             : 
    6502             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6503             : {
    6504             :     return GWKRun(
    6505             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
    6506             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
    6507             : }
    6508             : #endif
    6509             : 
    6510          12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6511             : {
    6512          12 :     return GWKRun(
    6513             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
    6514          12 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
    6515             : }
    6516             : 
    6517             : /************************************************************************/
    6518             : /*                          GWKNearestByte()                            */
    6519             : /*                                                                      */
    6520             : /*      Case for 8bit input data with nearest neighbour resampling      */
    6521             : /*      using valid flags. Should be as fast as possible for this       */
    6522             : /*      particular transformation type.                                 */
    6523             : /************************************************************************/
    6524             : 
    6525         343 : template <class T> static void GWKNearestThread(void *pData)
    6526             : 
    6527             : {
    6528         343 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6529         343 :     GDALWarpKernel *poWK = psJob->poWK;
    6530         343 :     const int iYMin = psJob->iYMin;
    6531         343 :     const int iYMax = psJob->iYMax;
    6532         343 :     const double dfMultFactorVerticalShiftPipeline =
    6533         343 :         poWK->bApplyVerticalShift
    6534           0 :             ? CPLAtof(CSLFetchNameValueDef(
    6535           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6536             :                   "1.0"))
    6537             :             : 0.0;
    6538             : 
    6539         343 :     const int nDstXSize = poWK->nDstXSize;
    6540         343 :     const int nSrcXSize = poWK->nSrcXSize;
    6541         343 :     const int nSrcYSize = poWK->nSrcYSize;
    6542             : 
    6543             :     /* -------------------------------------------------------------------- */
    6544             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6545             :     /*      scanlines worth of positions.                                   */
    6546             :     /* -------------------------------------------------------------------- */
    6547             : 
    6548             :     // For x, 2 *, because we cache the precomputed values at the end.
    6549             :     double *padfX =
    6550         343 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6551             :     double *padfY =
    6552         343 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6553             :     double *padfZ =
    6554         343 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6555         343 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6556             : 
    6557         343 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6558         343 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6559         343 :     const double dfErrorThreshold = CPLAtof(
    6560         343 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6561             : 
    6562             :     const bool bOneSourceCornerFailsToReproject =
    6563         343 :         GWKOneSourceCornerFailsToReproject(psJob);
    6564             : 
    6565             :     // Precompute values.
    6566       49707 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6567       49364 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6568             : 
    6569             :     /* ==================================================================== */
    6570             :     /*      Loop over output lines.                                         */
    6571             :     /* ==================================================================== */
    6572       37157 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6573             :     {
    6574             : 
    6575             :         /* --------------------------------------------------------------------
    6576             :          */
    6577             :         /*      Setup points to transform to source image space. */
    6578             :         /* --------------------------------------------------------------------
    6579             :          */
    6580       36814 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6581       36814 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6582     7743095 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6583     7706282 :             padfY[iDstX] = dfY;
    6584       36814 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6585             : 
    6586             :         /* --------------------------------------------------------------------
    6587             :          */
    6588             :         /*      Transform the points from destination pixel/line coordinates */
    6589             :         /*      to source pixel/line coordinates. */
    6590             :         /* --------------------------------------------------------------------
    6591             :          */
    6592       36814 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6593             :                              padfY, padfZ, pabSuccess);
    6594       36814 :         if (dfSrcCoordPrecision > 0.0)
    6595             :         {
    6596           0 :             GWKRoundSourceCoordinates(
    6597             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6598             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6599           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6600             :         }
    6601             :         /* ====================================================================
    6602             :          */
    6603             :         /*      Loop over pixels in output scanline. */
    6604             :         /* ====================================================================
    6605             :          */
    6606     7743095 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6607             :         {
    6608     7706282 :             GPtrDiff_t iSrcOffset = 0;
    6609     7706282 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6610             :                                               padfX, padfY, nSrcXSize,
    6611             :                                               nSrcYSize, iSrcOffset))
    6612     2164638 :                 continue;
    6613             : 
    6614             :             /* --------------------------------------------------------------------
    6615             :              */
    6616             :             /*      Do not try to apply invalid source pixels to the dest. */
    6617             :             /* --------------------------------------------------------------------
    6618             :              */
    6619     7524668 :             if (poWK->panUnifiedSrcValid != nullptr &&
    6620      931241 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6621             :             {
    6622       49670 :                 if (!bOneSourceCornerFailsToReproject)
    6623             :                 {
    6624       42185 :                     continue;
    6625             :                 }
    6626        7485 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    6627             :                 {
    6628        5224 :                     continue;
    6629             :                 }
    6630             :             }
    6631             : 
    6632             :             /* --------------------------------------------------------------------
    6633             :              */
    6634             :             /*      Do not try to apply transparent source pixels to the
    6635             :              * destination.*/
    6636             :             /* --------------------------------------------------------------------
    6637             :              */
    6638     6546016 :             double dfDensity = 1.0;
    6639             : 
    6640     6546016 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    6641             :             {
    6642     1162245 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    6643     1162245 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    6644     1004371 :                     continue;
    6645             :             }
    6646             : 
    6647             :             /* ====================================================================
    6648             :              */
    6649             :             /*      Loop processing each band. */
    6650             :             /* ====================================================================
    6651             :              */
    6652             : 
    6653     5541654 :             const GPtrDiff_t iDstOffset =
    6654     5541654 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6655             : 
    6656    12873738 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6657             :             {
    6658     7332114 :                 T value = 0;
    6659     7332114 :                 double dfBandDensity = 0.0;
    6660             : 
    6661             :                 /* --------------------------------------------------------------------
    6662             :                  */
    6663             :                 /*      Collect the source value. */
    6664             :                 /* --------------------------------------------------------------------
    6665             :                  */
    6666     7332114 :                 if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
    6667             :                                  &value))
    6668             :                 {
    6669             : 
    6670     7332104 :                     if (poWK->bApplyVerticalShift)
    6671             :                     {
    6672           0 :                         if (!std::isfinite(padfZ[iDstX]))
    6673           0 :                             continue;
    6674             :                         // Subtract padfZ[] since the coordinate transformation
    6675             :                         // is from target to source
    6676           0 :                         value = GWKClampValueT<T>(
    6677           0 :                             value * poWK->dfMultFactorVerticalShift -
    6678           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6679             :                     }
    6680             : 
    6681     7332104 :                     if (dfBandDensity < 1.0)
    6682             :                     {
    6683      159076 :                         if (dfBandDensity == 0.0)
    6684             :                         {
    6685             :                             // Do nothing.
    6686             :                         }
    6687             :                         else
    6688             :                         {
    6689             :                             // Let the general code take care of mixing.
    6690      159076 :                             GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    6691             :                                                   dfBandDensity, value);
    6692             :                         }
    6693             :                     }
    6694             :                     else
    6695             :                     {
    6696     7173023 :                         reinterpret_cast<T *>(
    6697     7173023 :                             poWK->papabyDstImage[iBand])[iDstOffset] = value;
    6698             :                     }
    6699             :                 }
    6700             :             }
    6701             : 
    6702             :             /* --------------------------------------------------------------------
    6703             :              */
    6704             :             /*      Mark this pixel valid/opaque in the output. */
    6705             :             /* --------------------------------------------------------------------
    6706             :              */
    6707     5541654 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6708             : 
    6709     5541654 :             if (poWK->panDstValid != nullptr)
    6710             :             {
    6711     4862206 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6712             :             }
    6713             :         } /* Next iDstX */
    6714             : 
    6715             :         /* --------------------------------------------------------------------
    6716             :          */
    6717             :         /*      Report progress to the user, and optionally cancel out. */
    6718             :         /* --------------------------------------------------------------------
    6719             :          */
    6720       36814 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6721           0 :             break;
    6722             :     }
    6723             : 
    6724             :     /* -------------------------------------------------------------------- */
    6725             :     /*      Cleanup and return.                                             */
    6726             :     /* -------------------------------------------------------------------- */
    6727         343 :     CPLFree(padfX);
    6728         343 :     CPLFree(padfY);
    6729         343 :     CPLFree(padfZ);
    6730         343 :     CPLFree(pabSuccess);
    6731         343 : }
    6732             : 
    6733         276 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
    6734             : {
    6735         276 :     return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
    6736             : }
    6737             : 
    6738          18 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6739             : {
    6740          18 :     return GWKRun(
    6741             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
    6742          18 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
    6743             : }
    6744             : 
    6745          18 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6746             : {
    6747          18 :     return GWKRun(
    6748             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
    6749             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
    6750          18 :                                                            GRA_Bilinear>);
    6751             : }
    6752             : 
    6753           6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6754             : {
    6755           6 :     return GWKRun(
    6756             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
    6757             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
    6758           6 :                                                            GRA_Bilinear>);
    6759             : }
    6760             : 
    6761           5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6762             : {
    6763           5 :     return GWKRun(
    6764             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
    6765             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
    6766           5 :                                                            GRA_Bilinear>);
    6767             : }
    6768             : 
    6769             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6770             : 
    6771             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6772             : {
    6773             :     return GWKRun(
    6774             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
    6775             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
    6776             :                                                            GRA_Bilinear>);
    6777             : }
    6778             : #endif
    6779             : 
    6780           5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6781             : {
    6782           5 :     return GWKRun(
    6783             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
    6784           5 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
    6785             : }
    6786             : 
    6787          12 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6788             : {
    6789          12 :     return GWKRun(
    6790             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
    6791          12 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
    6792             : }
    6793             : 
    6794           6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6795             : {
    6796           6 :     return GWKRun(
    6797             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
    6798           6 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
    6799             : }
    6800             : 
    6801           5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6802             : {
    6803           5 :     return GWKRun(
    6804             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
    6805           5 :         GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
    6806             : }
    6807             : 
    6808          27 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
    6809             : {
    6810          27 :     return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
    6811             : }
    6812             : 
    6813          11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6814             : {
    6815          11 :     return GWKRun(
    6816             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
    6817          11 :         GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
    6818             : }
    6819             : 
    6820          36 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
    6821             : {
    6822          36 :     return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
    6823             : }
    6824             : 
    6825             : /************************************************************************/
    6826             : /*                           GWKAverageOrMode()                         */
    6827             : /*                                                                      */
    6828             : /************************************************************************/
    6829             : 
    6830             : static void GWKAverageOrModeThread(void *pData);
    6831             : 
    6832         130 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
    6833             : {
    6834         130 :     return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
    6835             : }
    6836             : 
    6837             : // Overall logic based on GWKGeneralCaseThread().
    6838         130 : static void GWKAverageOrModeThread(void *pData)
    6839             : {
    6840         130 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6841         130 :     GDALWarpKernel *poWK = psJob->poWK;
    6842         130 :     const int iYMin = psJob->iYMin;
    6843         130 :     const int iYMax = psJob->iYMax;
    6844             :     const double dfMultFactorVerticalShiftPipeline =
    6845         130 :         poWK->bApplyVerticalShift
    6846         130 :             ? CPLAtof(CSLFetchNameValueDef(
    6847           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6848             :                   "1.0"))
    6849         130 :             : 0.0;
    6850             : 
    6851         130 :     const int nDstXSize = poWK->nDstXSize;
    6852         130 :     const int nSrcXSize = poWK->nSrcXSize;
    6853         130 :     const int nSrcYSize = poWK->nSrcYSize;
    6854             : 
    6855             :     /* -------------------------------------------------------------------- */
    6856             :     /*      Find out which algorithm to use (small optim.)                  */
    6857             :     /* -------------------------------------------------------------------- */
    6858         130 :     int nAlgo = 0;
    6859             : 
    6860             :     // Only used for GRA_Mode
    6861         130 :     float *pafRealVals = nullptr;
    6862         130 :     float *pafCounts = nullptr;
    6863         130 :     int nBins = 0;
    6864         130 :     int nBinsOffset = 0;
    6865         130 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    6866             : 
    6867             :     // Only used with nAlgo = 6.
    6868         130 :     float quant = 0.5;
    6869             : 
    6870             :     // To control array allocation only when data type is complex
    6871         130 :     const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
    6872             : 
    6873         130 :     if (poWK->eResample == GRA_Average)
    6874             :     {
    6875          71 :         nAlgo = GWKAOM_Average;
    6876             :     }
    6877          59 :     else if (poWK->eResample == GRA_RMS)
    6878             :     {
    6879           9 :         nAlgo = GWKAOM_RMS;
    6880             :     }
    6881          50 :     else if (poWK->eResample == GRA_Mode)
    6882             :     {
    6883             :         // TODO check color table count > 256.
    6884          23 :         if (poWK->eWorkingDataType == GDT_Byte ||
    6885          17 :             poWK->eWorkingDataType == GDT_UInt16 ||
    6886          17 :             poWK->eWorkingDataType == GDT_Int16)
    6887             :         {
    6888          14 :             nAlgo = GWKAOM_Imode;
    6889             : 
    6890             :             // In the case of a paletted or non-paletted byte band,
    6891             :             // Input values are between 0 and 255.
    6892          14 :             if (poWK->eWorkingDataType == GDT_Byte)
    6893             :             {
    6894           6 :                 nBins = 256;
    6895             :             }
    6896             :             // In the case of Int8, input values are between -128 and 127.
    6897           8 :             else if (poWK->eWorkingDataType == GDT_Int8)
    6898             :             {
    6899           0 :                 nBins = 256;
    6900           0 :                 nBinsOffset = 128;
    6901             :             }
    6902             :             // In the case of Int16, input values are between -32768 and 32767.
    6903           8 :             else if (poWK->eWorkingDataType == GDT_Int16)
    6904             :             {
    6905           8 :                 nBins = 65536;
    6906           8 :                 nBinsOffset = 32768;
    6907             :             }
    6908             :             // In the case of UInt16, input values are between 0 and 65537.
    6909           0 :             else if (poWK->eWorkingDataType == GDT_UInt16)
    6910             :             {
    6911           0 :                 nBins = 65536;
    6912             :             }
    6913             :             pafCounts =
    6914          14 :                 static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
    6915          14 :             if (pafCounts == nullptr)
    6916           0 :                 return;
    6917             :         }
    6918             :         else
    6919             :         {
    6920           9 :             nAlgo = GWKAOM_Fmode;
    6921             : 
    6922           9 :             if (nSrcXSize > 0 && nSrcYSize > 0)
    6923             :             {
    6924             :                 pafRealVals = static_cast<float *>(
    6925           9 :                     VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    6926             :                 pafCounts = static_cast<float *>(
    6927           9 :                     VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    6928           9 :                 if (pafRealVals == nullptr || pafCounts == nullptr)
    6929             :                 {
    6930           0 :                     VSIFree(pafRealVals);
    6931           0 :                     VSIFree(pafCounts);
    6932           0 :                     return;
    6933             :                 }
    6934             :             }
    6935             :         }
    6936             :     }
    6937          27 :     else if (poWK->eResample == GRA_Max)
    6938             :     {
    6939           6 :         nAlgo = GWKAOM_Max;
    6940             :     }
    6941          21 :     else if (poWK->eResample == GRA_Min)
    6942             :     {
    6943           5 :         nAlgo = GWKAOM_Min;
    6944             :     }
    6945          16 :     else if (poWK->eResample == GRA_Med)
    6946             :     {
    6947           6 :         nAlgo = GWKAOM_Quant;
    6948           6 :         quant = 0.5;
    6949             :     }
    6950          10 :     else if (poWK->eResample == GRA_Q1)
    6951             :     {
    6952           5 :         nAlgo = GWKAOM_Quant;
    6953           5 :         quant = 0.25;
    6954             :     }
    6955           5 :     else if (poWK->eResample == GRA_Q3)
    6956             :     {
    6957           5 :         nAlgo = GWKAOM_Quant;
    6958           5 :         quant = 0.75;
    6959             :     }
    6960             : #ifdef disabled
    6961             :     else if (poWK->eResample == GRA_Sum)
    6962             :     {
    6963             :         nAlgo = GWKAOM_Sum;
    6964             :     }
    6965             : #endif
    6966             :     else
    6967             :     {
    6968             :         // Other resample algorithms not permitted here.
    6969           0 :         CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
    6970             :                          "illegal resample");
    6971           0 :         return;
    6972             :     }
    6973             : 
    6974         130 :     CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() using algo %d",
    6975             :              nAlgo);
    6976             : 
    6977             :     /* -------------------------------------------------------------------- */
    6978             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    6979             :     /*      scanlines worth of positions.                                   */
    6980             :     /* -------------------------------------------------------------------- */
    6981             : 
    6982             :     double *padfX =
    6983         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6984             :     double *padfY =
    6985         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6986             :     double *padfZ =
    6987         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6988             :     double *padfX2 =
    6989         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6990             :     double *padfY2 =
    6991         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6992             :     double *padfZ2 =
    6993         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6994         130 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6995         130 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6996             : 
    6997         130 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6998         130 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6999         130 :     const double dfErrorThreshold = CPLAtof(
    7000         130 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7001             : 
    7002             :     const double dfExcludedValuesThreshold =
    7003         130 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7004             :                                      "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
    7005         130 :         100.0;
    7006             :     const double dfNodataValuesThreshold =
    7007         130 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7008             :                                      "NODATA_VALUES_PCT_THRESHOLD", "100")) /
    7009         130 :         100.0;
    7010             : 
    7011             :     const int nXMargin =
    7012         130 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7013             :     const int nYMargin =
    7014         130 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7015             : 
    7016             :     /* ==================================================================== */
    7017             :     /*      Loop over output lines.                                         */
    7018             :     /* ==================================================================== */
    7019        6627 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7020             :     {
    7021             : 
    7022             :         /* --------------------------------------------------------------------
    7023             :          */
    7024             :         /*      Setup points to transform to source image space. */
    7025             :         /* --------------------------------------------------------------------
    7026             :          */
    7027     1669840 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7028             :         {
    7029     1663340 :             padfX[iDstX] = iDstX + poWK->nDstXOff;
    7030     1663340 :             padfY[iDstX] = iDstY + poWK->nDstYOff;
    7031     1663340 :             padfZ[iDstX] = 0.0;
    7032     1663340 :             padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
    7033     1663340 :             padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
    7034     1663340 :             padfZ2[iDstX] = 0.0;
    7035             :         }
    7036             : 
    7037             :         /* --------------------------------------------------------------------
    7038             :          */
    7039             :         /*      Transform the points from destination pixel/line coordinates */
    7040             :         /*      to source pixel/line coordinates. */
    7041             :         /* --------------------------------------------------------------------
    7042             :          */
    7043        6497 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    7044             :                              padfY, padfZ, pabSuccess);
    7045        6497 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
    7046             :                              padfY2, padfZ2, pabSuccess2);
    7047             : 
    7048        6497 :         if (dfSrcCoordPrecision > 0.0)
    7049             :         {
    7050           0 :             GWKRoundSourceCoordinates(
    7051             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    7052             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    7053           0 :                 poWK->nDstXOff, iDstY + poWK->nDstYOff);
    7054           0 :             GWKRoundSourceCoordinates(
    7055             :                 nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2,
    7056             :                 dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
    7057           0 :                 psJob->pTransformerArg, 1.0 + poWK->nDstXOff,
    7058           0 :                 iDstY + 1.0 + poWK->nDstYOff);
    7059             :         }
    7060             : 
    7061             :         /* ====================================================================
    7062             :          */
    7063             :         /*      Loop over pixels in output scanline. */
    7064             :         /* ====================================================================
    7065             :          */
    7066     1669840 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7067             :         {
    7068     1663340 :             GPtrDiff_t iSrcOffset = 0;
    7069     1663340 :             double dfDensity = 1.0;
    7070     1663340 :             bool bHasFoundDensity = false;
    7071             : 
    7072     1663340 :             if (!pabSuccess[iDstX] || !pabSuccess2[iDstX])
    7073      311460 :                 continue;
    7074             : 
    7075             :             // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
    7076             :             // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
    7077     1663340 :             if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    7078     1663320 :                   padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    7079     1663320 :                   padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    7080     1663300 :                   padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    7081     1663300 :                   padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    7082     1663300 :                   padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    7083     1663290 :                   padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
    7084     1663290 :                   padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
    7085             :             {
    7086          62 :                 continue;
    7087             :             }
    7088             : 
    7089     1663280 :             const GPtrDiff_t iDstOffset =
    7090     1663280 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7091             : 
    7092             :             // Compute corners in source crs.
    7093             : 
    7094             :             // The transformation might not have preserved ordering of
    7095             :             // coordinates so do the necessary swapping (#5433).
    7096             :             // NOTE: this is really an approximative fix. To do something
    7097             :             // more precise we would for example need to compute the
    7098             :             // transformation of coordinates in the
    7099             :             // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
    7100             :             // coordinates, and take the bounding box of the got source
    7101             :             // coordinates.
    7102             : 
    7103     1663280 :             if (padfX[iDstX] > padfX2[iDstX])
    7104      268744 :                 std::swap(padfX[iDstX], padfX2[iDstX]);
    7105             : 
    7106             :             // Detect situations where the target pixel is close to the
    7107             :             // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
    7108             :             // close to the left-most and right-most columns of the source
    7109             :             // raster. The 2 value below was experimentally determined to
    7110             :             // avoid false-positives and false-negatives.
    7111             :             // Addresses https://github.com/OSGeo/gdal/issues/6478
    7112     1663280 :             bool bWrapOverX = false;
    7113     1663280 :             const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
    7114     1663280 :             if (poWK->nSrcXOff == 0 &&
    7115     1663280 :                 padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
    7116       14495 :                 (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale <
    7117             :                     nThresholdWrapOverX)
    7118             :             {
    7119             :                 // Check there is a discontinuity by checking at mid-pixel.
    7120             :                 // NOTE: all this remains fragile. To confidently
    7121             :                 // detect antimeridian warping we should probably try to access
    7122             :                 // georeferenced coordinates, and not rely only on tests on
    7123             :                 // image space coordinates. But accessing georeferenced
    7124             :                 // coordinates from here is not trivial, and we would for example
    7125             :                 // have to handle both geographic, Mercator, etc.
    7126             :                 // Let's hope this heuristics is good enough for now.
    7127        1041 :                 double x = iDstX + 0.5 + poWK->nDstXOff;
    7128        1041 :                 double y = iDstY + poWK->nDstYOff;
    7129        1041 :                 double z = 0;
    7130        1041 :                 int bSuccess = FALSE;
    7131        1041 :                 poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y,
    7132             :                                      &z, &bSuccess);
    7133        1041 :                 if (bSuccess && x < padfX[iDstX])
    7134             :                 {
    7135        1008 :                     bWrapOverX = true;
    7136        1008 :                     std::swap(padfX[iDstX], padfX2[iDstX]);
    7137        1008 :                     padfX2[iDstX] += nSrcXSize;
    7138             :                 }
    7139             :             }
    7140             : 
    7141     1663280 :             const double dfXMin = padfX[iDstX] - poWK->nSrcXOff;
    7142     1663280 :             const double dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
    7143     1663280 :             constexpr double EPS = 1e-10;
    7144             :             // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
    7145     1663280 :             if (!(dfXMax > -EPS && dfXMin < nSrcXSize + EPS))
    7146          72 :                 continue;
    7147     1663200 :             int iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPS), 0.0));
    7148     1663200 :             int iSrcXMax = static_cast<int>(
    7149     1663200 :                 std::min(ceil(dfXMax - EPS), static_cast<double>(INT_MAX)));
    7150     1663200 :             if (!bWrapOverX)
    7151     1662200 :                 iSrcXMax = std::min(iSrcXMax, nSrcXSize);
    7152     1663200 :             if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
    7153         472 :                 iSrcXMax++;
    7154             : 
    7155     1663200 :             if (padfY[iDstX] > padfY2[iDstX])
    7156      270117 :                 std::swap(padfY[iDstX], padfY2[iDstX]);
    7157     1663200 :             const double dfYMin = padfY[iDstX] - poWK->nSrcYOff;
    7158     1663200 :             const double dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
    7159             :             // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
    7160     1663200 :             if (!(dfYMax > -EPS && dfYMin < nSrcYSize + EPS))
    7161          36 :                 continue;
    7162     1663170 :             int iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPS), 0.0));
    7163             :             int iSrcYMax =
    7164     1663170 :                 std::min(static_cast<int>(ceil(dfYMax - EPS)), nSrcYSize);
    7165     1663170 :             if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
    7166           0 :                 iSrcYMax++;
    7167             : 
    7168             : #define COMPUTE_WEIGHT_Y(iSrcY)                                                \
    7169             :     ((iSrcY == iSrcYMin)                                                       \
    7170             :          ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin))        \
    7171             :      : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax)                       \
    7172             :                                : 1.0)
    7173             : 
    7174             : #define COMPUTE_WEIGHT(iSrcX, dfWeightY)                                       \
    7175             :     ((iSrcX == iSrcXMin)       ? ((iSrcXMin + 1 == iSrcXMax)                   \
    7176             :                                       ? dfWeightY                              \
    7177             :                                       : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
    7178             :      : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax))         \
    7179             :                                : dfWeightY)
    7180             : 
    7181     1663170 :             bool bDone = false;
    7182             : 
    7183             :             // Special Average mode where we process all bands together,
    7184             :             // to avoid averaging tuples that match an entry of m_aadfExcludedValues
    7185     2267240 :             if (nAlgo == GWKAOM_Average &&
    7186      604073 :                 (!poWK->m_aadfExcludedValues.empty() ||
    7187      393224 :                  dfNodataValuesThreshold < 1 - EPS) &&
    7188     2267240 :                 !poWK->bApplyVerticalShift && !bIsComplex)
    7189             :             {
    7190      393224 :                 double dfTotalWeightInvalid = 0.0;
    7191      393224 :                 double dfTotalWeightExcluded = 0.0;
    7192      393224 :                 double dfTotalWeightRegular = 0.0;
    7193      786448 :                 std::vector<double> adfValueReal(poWK->nBands, 0);
    7194      786448 :                 std::vector<double> adfValueAveraged(poWK->nBands, 0);
    7195             :                 std::vector<int> anCountExcludedValues(
    7196      393224 :                     poWK->m_aadfExcludedValues.size(), 0);
    7197             : 
    7198     1572890 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7199             :                 {
    7200     1179660 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7201     1179660 :                     iSrcOffset =
    7202     1179660 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7203     5111860 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7204             :                          iSrcX++, iSrcOffset++)
    7205             :                     {
    7206     3932190 :                         if (bWrapOverX)
    7207           0 :                             iSrcOffset =
    7208           0 :                                 (iSrcX % nSrcXSize) +
    7209           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7210             : 
    7211     3932190 :                         const double dfWeight =
    7212     3932190 :                             COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7213     3932190 :                         if (dfWeight <= 0)
    7214           0 :                             continue;
    7215             : 
    7216     3932200 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7217          12 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7218             :                         {
    7219           3 :                             dfTotalWeightInvalid += dfWeight;
    7220           3 :                             continue;
    7221             :                         }
    7222             : 
    7223     3932190 :                         bool bAllValid = true;
    7224     7274900 :                         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7225             :                         {
    7226     6160660 :                             double dfBandDensity = 0;
    7227     6160660 :                             double dfValueImagTmp = 0;
    7228     9503370 :                             if (!(GWKGetPixelValue(
    7229             :                                       poWK, iBand, iSrcOffset, &dfBandDensity,
    7230     6160660 :                                       &adfValueReal[iBand], &dfValueImagTmp) &&
    7231     3342710 :                                   dfBandDensity > BAND_DENSITY_THRESHOLD))
    7232             :                             {
    7233     2817950 :                                 bAllValid = false;
    7234     2817950 :                                 break;
    7235             :                             }
    7236             :                         }
    7237             : 
    7238     3932190 :                         if (!bAllValid)
    7239             :                         {
    7240     2817950 :                             dfTotalWeightInvalid += dfWeight;
    7241     2817950 :                             continue;
    7242             :                         }
    7243             : 
    7244     1114240 :                         bool bExcludedValueFound = false;
    7245     2228350 :                         for (size_t i = 0;
    7246     2228350 :                              i < poWK->m_aadfExcludedValues.size(); ++i)
    7247             :                         {
    7248     1114130 :                             if (poWK->m_aadfExcludedValues[i] == adfValueReal)
    7249             :                             {
    7250          21 :                                 bExcludedValueFound = true;
    7251          21 :                                 ++anCountExcludedValues[i];
    7252          21 :                                 dfTotalWeightExcluded += dfWeight;
    7253          21 :                                 break;
    7254             :                             }
    7255             :                         }
    7256     1114240 :                         if (!bExcludedValueFound)
    7257             :                         {
    7258             :                             // Weighted incremental algorithm mean
    7259             :                             // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7260     1114220 :                             dfTotalWeightRegular += dfWeight;
    7261     4456870 :                             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7262             :                             {
    7263     3342650 :                                 adfValueAveraged[iBand] +=
    7264     6685300 :                                     (dfWeight / dfTotalWeightRegular) *
    7265     6685300 :                                     (adfValueReal[iBand] -
    7266     3342650 :                                      adfValueAveraged[iBand]);
    7267             :                             }
    7268             :                         }
    7269             :                     }
    7270             :                 }
    7271             : 
    7272      393224 :                 const double dfTotalWeight = dfTotalWeightInvalid +
    7273             :                                              dfTotalWeightExcluded +
    7274             :                                              dfTotalWeightRegular;
    7275      393224 :                 if (dfTotalWeightInvalid > 0 &&
    7276             :                     dfTotalWeightInvalid >=
    7277      311293 :                         dfNodataValuesThreshold * dfTotalWeight)
    7278             :                 {
    7279             :                     // Do nothing. Let bHasFoundDensity to false.
    7280             :                 }
    7281       81934 :                 else if (dfTotalWeightExcluded > 0 &&
    7282             :                          dfTotalWeightExcluded >=
    7283           6 :                              dfExcludedValuesThreshold * dfTotalWeight)
    7284             :                 {
    7285             :                     // Find the most represented excluded value tuple
    7286           3 :                     size_t iExcludedValue = 0;
    7287           3 :                     int nExcludedValueCount = 0;
    7288           6 :                     for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
    7289             :                          ++i)
    7290             :                     {
    7291           3 :                         if (anCountExcludedValues[i] > nExcludedValueCount)
    7292             :                         {
    7293           3 :                             iExcludedValue = i;
    7294           3 :                             nExcludedValueCount = anCountExcludedValues[i];
    7295             :                         }
    7296             :                     }
    7297             : 
    7298           3 :                     bHasFoundDensity = true;
    7299             : 
    7300          12 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7301             :                     {
    7302           9 :                         GWKSetPixelValue(
    7303             :                             poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
    7304           9 :                             poWK->m_aadfExcludedValues[iExcludedValue][iBand],
    7305             :                             0);
    7306           3 :                     }
    7307             :                 }
    7308       81931 :                 else if (dfTotalWeightRegular > 0)
    7309             :                 {
    7310       81931 :                     bHasFoundDensity = true;
    7311             : 
    7312      327720 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7313             :                     {
    7314      245789 :                         GWKSetPixelValue(poWK, iBand, iDstOffset,
    7315             :                                          /* dfBandDensity = */ 1.0,
    7316      245789 :                                          adfValueAveraged[iBand], 0);
    7317             :                     }
    7318             :                 }
    7319             : 
    7320             :                 // Skip below loop on bands
    7321      393224 :                 bDone = true;
    7322             :             }
    7323             : 
    7324             :             /* ====================================================================
    7325             :              */
    7326             :             /*      Loop processing each band. */
    7327             :             /* ====================================================================
    7328             :              */
    7329             : 
    7330     4439540 :             for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
    7331             :             {
    7332     2776380 :                 double dfBandDensity = 0.0;
    7333     2776380 :                 double dfValueReal = 0.0;
    7334     2776380 :                 double dfValueImag = 0.0;
    7335     2776380 :                 double dfValueRealTmp = 0.0;
    7336     2776380 :                 double dfValueImagTmp = 0.0;
    7337             : 
    7338             :                 /* --------------------------------------------------------------------
    7339             :                  */
    7340             :                 /*      Collect the source value. */
    7341             :                 /* --------------------------------------------------------------------
    7342             :                  */
    7343             : 
    7344             :                 // Loop over source lines and pixels - 3 possible algorithms.
    7345             : 
    7346             :                 // poWK->eResample == GRA_Average.
    7347     2776380 :                 if (nAlgo == GWKAOM_Average)
    7348             :                 {
    7349      300849 :                     double dfTotalWeight = 0.0;
    7350             : 
    7351             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7352             :                     // in gcore/overview.cpp.
    7353      631308 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7354             :                     {
    7355      330459 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7356      330459 :                         iSrcOffset = iSrcXMin +
    7357      330459 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7358      803200 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7359             :                              iSrcX++, iSrcOffset++)
    7360             :                         {
    7361      472741 :                             if (bWrapOverX)
    7362         630 :                                 iSrcOffset =
    7363         630 :                                     (iSrcX % nSrcXSize) +
    7364         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7365             : 
    7366      472745 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7367           4 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7368             :                                             iSrcOffset))
    7369             :                             {
    7370           1 :                                 continue;
    7371             :                             }
    7372             : 
    7373      472740 :                             if (GWKGetPixelValue(
    7374             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7375      945480 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7376      472740 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7377             :                             {
    7378      472740 :                                 const double dfWeight =
    7379      472740 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7380      472740 :                                 if (dfWeight > 0)
    7381             :                                 {
    7382             :                                     // Weighted incremental algorithm mean
    7383             :                                     // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7384      472740 :                                     dfTotalWeight += dfWeight;
    7385      472740 :                                     dfValueReal +=
    7386      472740 :                                         (dfWeight / dfTotalWeight) *
    7387      472740 :                                         (dfValueRealTmp - dfValueReal);
    7388      472740 :                                     if (bIsComplex)
    7389             :                                     {
    7390         252 :                                         dfValueImag +=
    7391         252 :                                             (dfWeight / dfTotalWeight) *
    7392         252 :                                             (dfValueImagTmp - dfValueImag);
    7393             :                                     }
    7394             :                                 }
    7395             :                             }
    7396             :                         }
    7397             :                     }
    7398             : 
    7399      300849 :                     if (dfTotalWeight > 0)
    7400             :                     {
    7401      300849 :                         if (poWK->bApplyVerticalShift)
    7402             :                         {
    7403           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7404           0 :                                 continue;
    7405             :                             // Subtract padfZ[] since the coordinate
    7406             :                             // transformation is from target to source
    7407           0 :                             dfValueReal =
    7408           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7409           0 :                                 padfZ[iDstX] *
    7410             :                                     dfMultFactorVerticalShiftPipeline;
    7411             :                         }
    7412             : 
    7413      300849 :                         dfBandDensity = 1;
    7414      300849 :                         bHasFoundDensity = true;
    7415             :                     }
    7416             :                 }  // GRA_Average.
    7417             :                 // poWK->eResample == GRA_RMS.
    7418     2776380 :                 if (nAlgo == GWKAOM_RMS)
    7419             :                 {
    7420      300416 :                     double dfTotalReal = 0.0;
    7421      300416 :                     double dfTotalImag = 0.0;
    7422      300416 :                     double dfTotalWeight = 0.0;
    7423             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7424             :                     // in gcore/overview.cpp.
    7425      630578 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7426             :                     {
    7427      330162 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7428      330162 :                         iSrcOffset = iSrcXMin +
    7429      330162 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7430      802723 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7431             :                              iSrcX++, iSrcOffset++)
    7432             :                         {
    7433      472561 :                             if (bWrapOverX)
    7434         630 :                                 iSrcOffset =
    7435         630 :                                     (iSrcX % nSrcXSize) +
    7436         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7437             : 
    7438      472561 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7439           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7440             :                                             iSrcOffset))
    7441             :                             {
    7442           0 :                                 continue;
    7443             :                             }
    7444             : 
    7445      472561 :                             if (GWKGetPixelValue(
    7446             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7447      945122 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7448      472561 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7449             :                             {
    7450      472561 :                                 const double dfWeight =
    7451      472561 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7452      472561 :                                 dfTotalWeight += dfWeight;
    7453      472561 :                                 dfTotalReal +=
    7454      472561 :                                     dfValueRealTmp * dfValueRealTmp * dfWeight;
    7455      472561 :                                 if (bIsComplex)
    7456          48 :                                     dfTotalImag += dfValueImagTmp *
    7457          48 :                                                    dfValueImagTmp * dfWeight;
    7458             :                             }
    7459             :                         }
    7460             :                     }
    7461             : 
    7462      300416 :                     if (dfTotalWeight > 0)
    7463             :                     {
    7464      300416 :                         dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
    7465             : 
    7466      300416 :                         if (poWK->bApplyVerticalShift)
    7467             :                         {
    7468           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7469           0 :                                 continue;
    7470             :                             // Subtract padfZ[] since the coordinate
    7471             :                             // transformation is from target to source
    7472           0 :                             dfValueReal =
    7473           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7474           0 :                                 padfZ[iDstX] *
    7475             :                                     dfMultFactorVerticalShiftPipeline;
    7476             :                         }
    7477             : 
    7478      300416 :                         if (bIsComplex)
    7479          12 :                             dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
    7480             : 
    7481      300416 :                         dfBandDensity = 1;
    7482      300416 :                         bHasFoundDensity = true;
    7483             :                     }
    7484             :                 }  // GRA_RMS.
    7485             : #ifdef disabled
    7486             :                 else if (nAlgo == GWKAOM_Sum)
    7487             :                 // poWK->eResample == GRA_Sum
    7488             :                 {
    7489             :                     double dfTotalReal = 0.0;
    7490             :                     double dfTotalImag = 0.0;
    7491             :                     bool bFoundValid = false;
    7492             : 
    7493             :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7494             :                     {
    7495             :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7496             :                         iSrcOffset = iSrcXMin +
    7497             :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7498             :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7499             :                              iSrcX++, iSrcOffset++)
    7500             :                         {
    7501             :                             if (bWrapOverX)
    7502             :                                 iSrcOffset =
    7503             :                                     (iSrcX % nSrcXSize) +
    7504             :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7505             : 
    7506             :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7507             :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7508             :                                             iSrcOffset))
    7509             :                             {
    7510             :                                 continue;
    7511             :                             }
    7512             : 
    7513             :                             if (GWKGetPixelValue(
    7514             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7515             :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7516             :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7517             :                             {
    7518             :                                 const double dfWeight =
    7519             :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7520             :                                 bFoundValid = true;
    7521             :                                 dfTotalReal += dfValueRealTmp * dfWeight;
    7522             :                                 if (bIsComplex)
    7523             :                                 {
    7524             :                                     dfTotalImag += dfValueImagTmp * dfWeight;
    7525             :                                 }
    7526             :                             }
    7527             :                         }
    7528             :                     }
    7529             : 
    7530             :                     if (bFoundValid)
    7531             :                     {
    7532             :                         dfValueReal = dfTotalReal;
    7533             : 
    7534             :                         if (poWK->bApplyVerticalShift)
    7535             :                         {
    7536             :                             if (!std::isfinite(padfZ[iDstX]))
    7537             :                                 continue;
    7538             :                             // Subtract padfZ[] since the coordinate
    7539             :                             // transformation is from target to source
    7540             :                             dfValueReal =
    7541             :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7542             :                                 padfZ[iDstX] *
    7543             :                                     dfMultFactorVerticalShiftPipeline;
    7544             :                         }
    7545             : 
    7546             :                         if (bIsComplex)
    7547             :                         {
    7548             :                             dfValueImag = dfTotalImag;
    7549             :                         }
    7550             :                         dfBandDensity = 1;
    7551             :                         bHasFoundDensity = true;
    7552             :                     }
    7553             :                 }  // GRA_Sum.
    7554             : #endif
    7555     2475960 :                 else if (nAlgo == GWKAOM_Imode || nAlgo == GWKAOM_Fmode)
    7556             :                 // poWK->eResample == GRA_Mode
    7557             :                 {
    7558             :                     // This code adapted from GDALDownsampleChunk32R_Mode() in
    7559             :                     // gcore/overview.cpp.
    7560      500026 :                     if (nAlgo == GWKAOM_Fmode)  // int32 or float.
    7561             :                     {
    7562             :                         // Does it make sense it makes to run a
    7563             :                         // majority filter on floating point data? But, here it
    7564             :                         // is for the sake of compatibility. It won't look
    7565             :                         // right on RGB images by the nature of the filter.
    7566        3407 :                         nBins = 0;
    7567        3407 :                         int iModeIndex = -1;
    7568             : 
    7569       10228 :                         for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7570             :                         {
    7571        6821 :                             const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7572        6821 :                             iSrcOffset =
    7573        6821 :                                 iSrcXMin +
    7574        6821 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7575       20484 :                             for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7576             :                                  iSrcX++, iSrcOffset++)
    7577             :                             {
    7578       13663 :                                 if (bWrapOverX)
    7579           0 :                                     iSrcOffset =
    7580           0 :                                         (iSrcX % nSrcXSize) +
    7581           0 :                                         static_cast<GPtrDiff_t>(iSrcY) *
    7582           0 :                                             nSrcXSize;
    7583             : 
    7584       13663 :                                 if (poWK->panUnifiedSrcValid != nullptr &&
    7585           0 :                                     !CPLMaskGet(poWK->panUnifiedSrcValid,
    7586             :                                                 iSrcOffset))
    7587           0 :                                     continue;
    7588             : 
    7589       13663 :                                 if (GWKGetPixelValue(
    7590             :                                         poWK, iBand, iSrcOffset, &dfBandDensity,
    7591       27326 :                                         &dfValueRealTmp, &dfValueImagTmp) &&
    7592       13663 :                                     dfBandDensity > BAND_DENSITY_THRESHOLD)
    7593             :                                 {
    7594       13663 :                                     const float fVal =
    7595       13663 :                                         static_cast<float>(dfValueRealTmp);
    7596       13663 :                                     const double dfWeight =
    7597       13663 :                                         COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7598             : 
    7599             :                                     // Check array for existing entry.
    7600       13663 :                                     int i = 0;
    7601       29135 :                                     for (i = 0; i < nBins; ++i)
    7602             :                                     {
    7603       17768 :                                         if (pafRealVals[i] == fVal)
    7604             :                                         {
    7605             : 
    7606        2296 :                                             pafCounts[i] +=
    7607        2296 :                                                 static_cast<float>(dfWeight);
    7608        2296 :                                             bool bValIsMaxCount =
    7609        2296 :                                                 (pafCounts[i] >
    7610        2296 :                                                  pafCounts[iModeIndex]);
    7611             : 
    7612        2296 :                                             if (!bValIsMaxCount &&
    7613        1492 :                                                 pafCounts[i] ==
    7614        1492 :                                                     pafCounts[iModeIndex])
    7615             :                                             {
    7616        1487 :                                                 switch (eTieStrategy)
    7617             :                                                 {
    7618        1474 :                                                     case GWKTS_First:
    7619        1474 :                                                         break;
    7620           6 :                                                     case GWKTS_Min:
    7621           6 :                                                         bValIsMaxCount =
    7622             :                                                             fVal <
    7623             :                                                             pafRealVals
    7624           6 :                                                                 [iModeIndex];
    7625           6 :                                                         break;
    7626           7 :                                                     case GWKTS_Max:
    7627           7 :                                                         bValIsMaxCount =
    7628             :                                                             fVal >
    7629             :                                                             pafRealVals
    7630           7 :                                                                 [iModeIndex];
    7631           7 :                                                         break;
    7632             :                                                 }
    7633             :                                             }
    7634             : 
    7635        2296 :                                             if (bValIsMaxCount)
    7636             :                                             {
    7637         807 :                                                 iModeIndex = i;
    7638             :                                             }
    7639             : 
    7640        2296 :                                             break;
    7641             :                                         }
    7642             :                                     }
    7643             : 
    7644             :                                     // Add to arr if entry not already there.
    7645       13663 :                                     if (i == nBins)
    7646             :                                     {
    7647       11367 :                                         pafRealVals[i] = fVal;
    7648       11367 :                                         pafCounts[i] =
    7649       11367 :                                             static_cast<float>(dfWeight);
    7650             : 
    7651       11367 :                                         if (iModeIndex < 0)
    7652        3407 :                                             iModeIndex = i;
    7653             : 
    7654       11367 :                                         ++nBins;
    7655             :                                     }
    7656             :                                 }
    7657             :                             }
    7658             :                         }
    7659             : 
    7660        3407 :                         if (iModeIndex != -1)
    7661             :                         {
    7662        3407 :                             dfValueReal = pafRealVals[iModeIndex];
    7663             : 
    7664        3407 :                             if (poWK->bApplyVerticalShift)
    7665             :                             {
    7666           0 :                                 if (!std::isfinite(padfZ[iDstX]))
    7667           0 :                                     continue;
    7668             :                                 // Subtract padfZ[] since the coordinate
    7669             :                                 // transformation is from target to source
    7670           0 :                                 dfValueReal =
    7671           0 :                                     dfValueReal *
    7672           0 :                                         poWK->dfMultFactorVerticalShift -
    7673           0 :                                     padfZ[iDstX] *
    7674             :                                         dfMultFactorVerticalShiftPipeline;
    7675             :                             }
    7676             : 
    7677        3407 :                             dfBandDensity = 1;
    7678        3407 :                             bHasFoundDensity = true;
    7679             :                         }
    7680             :                     }
    7681             :                     else  // byte or int16.
    7682             :                     {
    7683      496619 :                         float fMaxCount = 0.0f;
    7684      496619 :                         int nMode = -1;
    7685      496619 :                         bool bHasSourceValues = false;
    7686             : 
    7687      496619 :                         memset(pafCounts, 0, nBins * sizeof(float));
    7688             : 
    7689     1612550 :                         for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7690             :                         {
    7691     1115930 :                             const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7692     1115930 :                             iSrcOffset =
    7693     1115930 :                                 iSrcXMin +
    7694     1115930 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7695     4733150 :                             for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7696             :                                  iSrcX++, iSrcOffset++)
    7697             :                             {
    7698     3617220 :                                 if (bWrapOverX)
    7699         630 :                                     iSrcOffset =
    7700         630 :                                         (iSrcX % nSrcXSize) +
    7701         630 :                                         static_cast<GPtrDiff_t>(iSrcY) *
    7702         630 :                                             nSrcXSize;
    7703             : 
    7704     3617220 :                                 if (poWK->panUnifiedSrcValid != nullptr &&
    7705           0 :                                     !CPLMaskGet(poWK->panUnifiedSrcValid,
    7706             :                                                 iSrcOffset))
    7707           0 :                                     continue;
    7708             : 
    7709     3617220 :                                 if (GWKGetPixelValue(
    7710             :                                         poWK, iBand, iSrcOffset, &dfBandDensity,
    7711     7234430 :                                         &dfValueRealTmp, &dfValueImagTmp) &&
    7712     3617220 :                                     dfBandDensity > BAND_DENSITY_THRESHOLD)
    7713             :                                 {
    7714     3617220 :                                     bHasSourceValues = true;
    7715     3617220 :                                     const int nVal =
    7716     3617220 :                                         static_cast<int>(dfValueRealTmp);
    7717     3617220 :                                     const int iBin = nVal + nBinsOffset;
    7718     3617220 :                                     const double dfWeight =
    7719     3617220 :                                         COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7720             : 
    7721             :                                     // Sum the density.
    7722     3617220 :                                     pafCounts[iBin] +=
    7723     3617220 :                                         static_cast<float>(dfWeight);
    7724             :                                     // Is it the most common value so far?
    7725     3617220 :                                     bool bUpdateMode =
    7726     3617220 :                                         pafCounts[iBin] > fMaxCount;
    7727     3617220 :                                     if (!bUpdateMode &&
    7728      778312 :                                         pafCounts[iBin] == fMaxCount)
    7729             :                                     {
    7730      218624 :                                         switch (eTieStrategy)
    7731             :                                         {
    7732      218616 :                                             case GWKTS_First:
    7733      218616 :                                                 break;
    7734           4 :                                             case GWKTS_Min:
    7735           4 :                                                 bUpdateMode = nVal < nMode;
    7736           4 :                                                 break;
    7737           4 :                                             case GWKTS_Max:
    7738           4 :                                                 bUpdateMode = nVal > nMode;
    7739           4 :                                                 break;
    7740             :                                         }
    7741             :                                     }
    7742     3617220 :                                     if (bUpdateMode)
    7743             :                                     {
    7744     2838910 :                                         nMode = nVal;
    7745     2838910 :                                         fMaxCount = pafCounts[iBin];
    7746             :                                     }
    7747             :                                 }
    7748             :                             }
    7749             :                         }
    7750             : 
    7751      496619 :                         if (bHasSourceValues)
    7752             :                         {
    7753      496619 :                             dfValueReal = nMode;
    7754             : 
    7755      496619 :                             if (poWK->bApplyVerticalShift)
    7756             :                             {
    7757           0 :                                 if (!std::isfinite(padfZ[iDstX]))
    7758           0 :                                     continue;
    7759             :                                 // Subtract padfZ[] since the coordinate
    7760             :                                 // transformation is from target to source
    7761           0 :                                 dfValueReal =
    7762           0 :                                     dfValueReal *
    7763           0 :                                         poWK->dfMultFactorVerticalShift -
    7764           0 :                                     padfZ[iDstX] *
    7765             :                                         dfMultFactorVerticalShiftPipeline;
    7766             :                             }
    7767             : 
    7768      496619 :                             dfBandDensity = 1;
    7769      496619 :                             bHasFoundDensity = true;
    7770             :                         }
    7771      500026 :                     }
    7772             :                 }  // GRA_Mode.
    7773     1975930 :                 else if (nAlgo == GWKAOM_Max)
    7774             :                 // poWK->eResample == GRA_Max.
    7775             :                 {
    7776      335037 :                     bool bFoundValid = false;
    7777      335037 :                     double dfTotalReal = std::numeric_limits<double>::lowest();
    7778             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7779     1288010 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7780             :                     {
    7781      952975 :                         iSrcOffset = iSrcXMin +
    7782      952975 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7783     4406540 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7784             :                              iSrcX++, iSrcOffset++)
    7785             :                         {
    7786     3453560 :                             if (bWrapOverX)
    7787         630 :                                 iSrcOffset =
    7788         630 :                                     (iSrcX % nSrcXSize) +
    7789         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7790             : 
    7791     3456370 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7792        2809 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7793             :                                             iSrcOffset))
    7794             :                             {
    7795        2446 :                                 continue;
    7796             :                             }
    7797             : 
    7798             :                             // Returns pixel value if it is not no data.
    7799     3451120 :                             if (GWKGetPixelValue(
    7800             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7801     6902230 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7802     3451120 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7803             :                             {
    7804     3451120 :                                 bFoundValid = true;
    7805     3451120 :                                 if (dfTotalReal < dfValueRealTmp)
    7806             :                                 {
    7807      442642 :                                     dfTotalReal = dfValueRealTmp;
    7808             :                                 }
    7809             :                             }
    7810             :                         }
    7811             :                     }
    7812             : 
    7813      335037 :                     if (bFoundValid)
    7814             :                     {
    7815      335037 :                         dfValueReal = dfTotalReal;
    7816             : 
    7817      335037 :                         if (poWK->bApplyVerticalShift)
    7818             :                         {
    7819           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7820           0 :                                 continue;
    7821             :                             // Subtract padfZ[] since the coordinate
    7822             :                             // transformation is from target to source
    7823           0 :                             dfValueReal =
    7824           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7825           0 :                                 padfZ[iDstX] *
    7826             :                                     dfMultFactorVerticalShiftPipeline;
    7827             :                         }
    7828             : 
    7829      335037 :                         dfBandDensity = 1;
    7830      335037 :                         bHasFoundDensity = true;
    7831             :                     }
    7832             :                 }  // GRA_Max.
    7833     1640900 :                 else if (nAlgo == GWKAOM_Min)
    7834             :                 // poWK->eResample == GRA_Min.
    7835             :                 {
    7836      335012 :                     bool bFoundValid = false;
    7837      335012 :                     double dfTotalReal = std::numeric_limits<double>::max();
    7838             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7839     1287720 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7840             :                     {
    7841      952710 :                         iSrcOffset = iSrcXMin +
    7842      952710 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7843     4403460 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7844             :                              iSrcX++, iSrcOffset++)
    7845             :                         {
    7846     3450750 :                             if (bWrapOverX)
    7847         630 :                                 iSrcOffset =
    7848         630 :                                     (iSrcX % nSrcXSize) +
    7849         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7850             : 
    7851     3450750 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7852           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7853             :                                             iSrcOffset))
    7854             :                             {
    7855           0 :                                 continue;
    7856             :                             }
    7857             : 
    7858             :                             // Returns pixel value if it is not no data.
    7859     3450750 :                             if (GWKGetPixelValue(
    7860             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7861     6901500 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7862     3450750 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7863             :                             {
    7864     3450750 :                                 bFoundValid = true;
    7865     3450750 :                                 if (dfTotalReal > dfValueRealTmp)
    7866             :                                 {
    7867      443069 :                                     dfTotalReal = dfValueRealTmp;
    7868             :                                 }
    7869             :                             }
    7870             :                         }
    7871             :                     }
    7872             : 
    7873      335012 :                     if (bFoundValid)
    7874             :                     {
    7875      335012 :                         dfValueReal = dfTotalReal;
    7876             : 
    7877      335012 :                         if (poWK->bApplyVerticalShift)
    7878             :                         {
    7879           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7880           0 :                                 continue;
    7881             :                             // Subtract padfZ[] since the coordinate
    7882             :                             // transformation is from target to source
    7883           0 :                             dfValueReal =
    7884           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7885           0 :                                 padfZ[iDstX] *
    7886             :                                     dfMultFactorVerticalShiftPipeline;
    7887             :                         }
    7888             : 
    7889      335012 :                         dfBandDensity = 1;
    7890      335012 :                         bHasFoundDensity = true;
    7891             :                     }
    7892             :                 }  // GRA_Min.
    7893     1305880 :                 else if (nAlgo == GWKAOM_Quant)
    7894             :                 // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
    7895             :                 {
    7896     1005040 :                     bool bFoundValid = false;
    7897     1005040 :                     std::vector<double> dfRealValuesTmp;
    7898             : 
    7899             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7900     3863170 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7901             :                     {
    7902     2858130 :                         iSrcOffset = iSrcXMin +
    7903     2858130 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7904    13210400 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7905             :                              iSrcX++, iSrcOffset++)
    7906             :                         {
    7907    10352300 :                             if (bWrapOverX)
    7908        1890 :                                 iSrcOffset =
    7909        1890 :                                     (iSrcX % nSrcXSize) +
    7910        1890 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7911             : 
    7912    10352300 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7913           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7914             :                                             iSrcOffset))
    7915             :                             {
    7916           0 :                                 continue;
    7917             :                             }
    7918             : 
    7919             :                             // Returns pixel value if it is not no data.
    7920    10352300 :                             if (GWKGetPixelValue(
    7921             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7922    20704500 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7923    10352300 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7924             :                             {
    7925    10352300 :                                 bFoundValid = true;
    7926    10352300 :                                 dfRealValuesTmp.push_back(dfValueRealTmp);
    7927             :                             }
    7928             :                         }
    7929             :                     }
    7930             : 
    7931     1005040 :                     if (bFoundValid)
    7932             :                     {
    7933     1005040 :                         std::sort(dfRealValuesTmp.begin(),
    7934             :                                   dfRealValuesTmp.end());
    7935             :                         int quantIdx = static_cast<int>(
    7936     1005040 :                             std::ceil(quant * dfRealValuesTmp.size() - 1));
    7937     1005040 :                         dfValueReal = dfRealValuesTmp[quantIdx];
    7938             : 
    7939     1005040 :                         if (poWK->bApplyVerticalShift)
    7940             :                         {
    7941           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7942           0 :                                 continue;
    7943             :                             // Subtract padfZ[] since the coordinate
    7944             :                             // transformation is from target to source
    7945           0 :                             dfValueReal =
    7946           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7947           0 :                                 padfZ[iDstX] *
    7948             :                                     dfMultFactorVerticalShiftPipeline;
    7949             :                         }
    7950             : 
    7951     1005040 :                         dfBandDensity = 1;
    7952     1005040 :                         bHasFoundDensity = true;
    7953     1005040 :                         dfRealValuesTmp.clear();
    7954             :                     }
    7955             :                 }  // Quantile.
    7956             : 
    7957             :                 /* --------------------------------------------------------------------
    7958             :                  */
    7959             :                 /*      We have a computed value from the source.  Now apply it
    7960             :                  * to      */
    7961             :                 /*      the destination pixel. */
    7962             :                 /* --------------------------------------------------------------------
    7963             :                  */
    7964     2776380 :                 if (bHasFoundDensity)
    7965             :                 {
    7966             :                     // TODO: Should we compute dfBandDensity in fct of
    7967             :                     // nCount/nCount2, or use as a threshold to set the dest
    7968             :                     // value?
    7969             :                     // dfBandDensity = (float) nCount / nCount2;
    7970             :                     // if( (float) nCount / nCount2 > 0.1 )
    7971             :                     // or fix gdalwarp crop_to_cutline to crop partially
    7972             :                     // overlapping pixels.
    7973     2776380 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    7974             :                                      dfValueReal, dfValueImag);
    7975             :                 }
    7976             :             }
    7977             : 
    7978     1663170 :             if (!bHasFoundDensity)
    7979      311290 :                 continue;
    7980             : 
    7981             :             /* --------------------------------------------------------------------
    7982             :              */
    7983             :             /*      Update destination density/validity masks. */
    7984             :             /* --------------------------------------------------------------------
    7985             :              */
    7986     1351880 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7987             : 
    7988     1351880 :             if (poWK->panDstValid != nullptr)
    7989             :             {
    7990          74 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7991             :             }
    7992             :         } /* Next iDstX */
    7993             : 
    7994             :         /* --------------------------------------------------------------------
    7995             :          */
    7996             :         /*      Report progress to the user, and optionally cancel out. */
    7997             :         /* --------------------------------------------------------------------
    7998             :          */
    7999        6497 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    8000           0 :             break;
    8001             :     }
    8002             : 
    8003             :     /* -------------------------------------------------------------------- */
    8004             :     /*      Cleanup and return.                                             */
    8005             :     /* -------------------------------------------------------------------- */
    8006         130 :     CPLFree(padfX);
    8007         130 :     CPLFree(padfY);
    8008         130 :     CPLFree(padfZ);
    8009         130 :     CPLFree(padfX2);
    8010         130 :     CPLFree(padfY2);
    8011         130 :     CPLFree(padfZ2);
    8012         130 :     CPLFree(pabSuccess);
    8013         130 :     CPLFree(pabSuccess2);
    8014         130 :     VSIFree(pafCounts);
    8015         130 :     VSIFree(pafRealVals);
    8016             : }
    8017             : 
    8018             : /************************************************************************/
    8019             : /*                         getOrientation()                             */
    8020             : /************************************************************************/
    8021             : 
    8022             : typedef std::pair<double, double> XYPair;
    8023             : 
    8024             : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
    8025             : // -1 if it is counter-clockwise oriented,
    8026             : // or 0 if it is colinear.
    8027     2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
    8028             : {
    8029     2355910 :     const double p1x = p1.first;
    8030     2355910 :     const double p1y = p1.second;
    8031     2355910 :     const double p2x = p2.first;
    8032     2355910 :     const double p2y = p2.second;
    8033     2355910 :     const double p3x = p3.first;
    8034     2355910 :     const double p3y = p3.second;
    8035     2355910 :     const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
    8036     2355910 :     if (std::abs(val) < 1e-20)
    8037        2690 :         return 0;
    8038     2353220 :     else if (val > 0)
    8039           0 :         return 1;
    8040             :     else
    8041     2353220 :         return -1;
    8042             : }
    8043             : 
    8044             : /************************************************************************/
    8045             : /*                          isConvex()                                  */
    8046             : /************************************************************************/
    8047             : 
    8048             : typedef std::vector<XYPair> XYPoly;
    8049             : 
    8050             : // poly must be closed
    8051      785302 : static bool isConvex(const XYPoly &poly)
    8052             : {
    8053      785302 :     const size_t n = poly.size();
    8054      785302 :     size_t i = 0;
    8055      785302 :     int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8056      785302 :     ++i;
    8057     2355910 :     for (; i < n - 2; ++i)
    8058             :     {
    8059             :         const int orientation =
    8060     1570600 :             getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8061     1570600 :         if (orientation != 0)
    8062             :         {
    8063     1567910 :             if (last_orientation == 0)
    8064           0 :                 last_orientation = orientation;
    8065     1567910 :             else if (orientation != last_orientation)
    8066           0 :                 return false;
    8067             :         }
    8068             :     }
    8069      785302 :     return true;
    8070             : }
    8071             : 
    8072             : /************************************************************************/
    8073             : /*                     pointIntersectsConvexPoly()                      */
    8074             : /************************************************************************/
    8075             : 
    8076             : // Returns whether xy intersects poly, that must be closed and convex.
    8077     6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
    8078             : {
    8079     6049100 :     const size_t n = poly.size();
    8080     6049100 :     double dx1 = xy.first - poly[0].first;
    8081     6049100 :     double dy1 = xy.second - poly[0].second;
    8082     6049100 :     double dx2 = poly[1].first - poly[0].first;
    8083     6049100 :     double dy2 = poly[1].second - poly[0].second;
    8084     6049100 :     double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
    8085             : 
    8086             :     // Check if the point remains on the same side (left/right) of all edges
    8087    14556400 :     for (size_t i = 2; i < n; i++)
    8088             :     {
    8089    12793100 :         dx1 = xy.first - poly[i - 1].first;
    8090    12793100 :         dy1 = xy.second - poly[i - 1].second;
    8091             : 
    8092    12793100 :         dx2 = poly[i].first - poly[i - 1].first;
    8093    12793100 :         dy2 = poly[i].second - poly[i - 1].second;
    8094             : 
    8095    12793100 :         double crossProduct = dx1 * dy2 - dx2 * dy1;
    8096    12793100 :         if (std::abs(prevCrossProduct) < 1e-20)
    8097      725558 :             prevCrossProduct = crossProduct;
    8098    12067500 :         else if (prevCrossProduct * crossProduct < 0)
    8099     4285760 :             return false;
    8100             :     }
    8101             : 
    8102     1763340 :     return true;
    8103             : }
    8104             : 
    8105             : /************************************************************************/
    8106             : /*                     getIntersection()                                */
    8107             : /************************************************************************/
    8108             : 
    8109             : /* Returns intersection of [p1,p2] with [p3,p4], if
    8110             :  * it is a single point, and the 2 segments are not colinear.
    8111             :  */
    8112    11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
    8113             :                             const XYPair &p3, const XYPair &p4, XYPair &xy)
    8114             : {
    8115    11811000 :     const double x1 = p1.first;
    8116    11811000 :     const double y1 = p1.second;
    8117    11811000 :     const double x2 = p2.first;
    8118    11811000 :     const double y2 = p2.second;
    8119    11811000 :     const double x3 = p3.first;
    8120    11811000 :     const double y3 = p3.second;
    8121    11811000 :     const double x4 = p4.first;
    8122    11811000 :     const double y4 = p4.second;
    8123    11811000 :     const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
    8124    11811000 :     const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
    8125    11811000 :     if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
    8126     9260780 :         return false;
    8127             : 
    8128     2550260 :     const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
    8129     2550260 :     if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
    8130      973924 :         return false;
    8131             : 
    8132     1576340 :     const double t = t_num / denom;
    8133     1576340 :     xy.first = x1 + t * (x2 - x1);
    8134     1576340 :     xy.second = y1 + t * (y2 - y1);
    8135     1576340 :     return true;
    8136             : }
    8137             : 
    8138             : /************************************************************************/
    8139             : /*                     getConvexPolyIntersection()                      */
    8140             : /************************************************************************/
    8141             : 
    8142             : // poly1 and poly2 must be closed and convex.
    8143             : // The returned intersection will not necessary be closed.
    8144      785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
    8145             :                                       XYPoly &intersection)
    8146             : {
    8147      785302 :     intersection.clear();
    8148             : 
    8149             :     // Add all points of poly1 inside poly2
    8150     3926510 :     for (size_t i = 0; i < poly1.size() - 1; ++i)
    8151             :     {
    8152     3141210 :         if (pointIntersectsConvexPoly(poly1[i], poly2))
    8153     1187430 :             intersection.push_back(poly1[i]);
    8154             :     }
    8155      785302 :     if (intersection.size() == poly1.size() - 1)
    8156             :     {
    8157             :         // poly1 is inside poly2
    8158      119100 :         return;
    8159             :     }
    8160             : 
    8161             :     // Add all points of poly2 inside poly1
    8162     3634860 :     for (size_t i = 0; i < poly2.size() - 1; ++i)
    8163             :     {
    8164     2907890 :         if (pointIntersectsConvexPoly(poly2[i], poly1))
    8165      575904 :             intersection.push_back(poly2[i]);
    8166             :     }
    8167             : 
    8168             :     // Compute the intersection of all edges of both polygons
    8169      726972 :     XYPair xy;
    8170     3634860 :     for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
    8171             :     {
    8172    14539400 :         for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
    8173             :         {
    8174    11631600 :             if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
    8175    11631600 :                                 poly2[i2 + 1], xy))
    8176             :             {
    8177     1576230 :                 intersection.push_back(xy);
    8178             :             }
    8179             :         }
    8180             :     }
    8181             : 
    8182      726972 :     if (intersection.empty())
    8183       60770 :         return;
    8184             : 
    8185             :     // Find lowest-left point in intersection set
    8186      666202 :     double lowest_x = std::numeric_limits<double>::max();
    8187      666202 :     double lowest_y = std::numeric_limits<double>::max();
    8188     3772450 :     for (const auto &pair : intersection)
    8189             :     {
    8190     3106240 :         const double x = pair.first;
    8191     3106240 :         const double y = pair.second;
    8192     3106240 :         if (y < lowest_y || (y == lowest_y && x < lowest_x))
    8193             :         {
    8194     1096040 :             lowest_x = x;
    8195     1096040 :             lowest_y = y;
    8196             :         }
    8197             :     }
    8198             : 
    8199     5737980 :     const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
    8200             :     {
    8201     5737980 :         const double p1x_diff = p1.first - lowest_x;
    8202     5737980 :         const double p1y_diff = p1.second - lowest_y;
    8203     5737980 :         const double p2x_diff = p2.first - lowest_x;
    8204     5737980 :         const double p2y_diff = p2.second - lowest_y;
    8205     5737980 :         if (p2y_diff == 0.0 && p1y_diff == 0.0)
    8206             :         {
    8207     2655420 :             if (p1x_diff >= 0)
    8208             :             {
    8209     2655420 :                 if (p2x_diff >= 0)
    8210     2655420 :                     return p1.first < p2.first;
    8211           0 :                 return true;
    8212             :             }
    8213             :             else
    8214             :             {
    8215           0 :                 if (p2x_diff >= 0)
    8216           0 :                     return false;
    8217           0 :                 return p1.first < p2.first;
    8218             :             }
    8219             :         }
    8220             : 
    8221     3082560 :         if (p2x_diff == 0.0 && p1x_diff == 0.0)
    8222     1046960 :             return p1.second < p2.second;
    8223             : 
    8224             :         double tan_p1;
    8225     2035600 :         if (p1x_diff == 0.0)
    8226      464622 :             tan_p1 = p1y_diff == 0.0 ? 0.0 : std::numeric_limits<double>::max();
    8227             :         else
    8228     1570980 :             tan_p1 = p1y_diff / p1x_diff;
    8229             : 
    8230             :         double tan_p2;
    8231     2035600 :         if (p2x_diff == 0.0)
    8232      839515 :             tan_p2 = p2y_diff == 0.0 ? 0.0 : std::numeric_limits<double>::max();
    8233             :         else
    8234     1196080 :             tan_p2 = p2y_diff / p2x_diff;
    8235             : 
    8236     2035600 :         if (tan_p1 >= 0)
    8237             :         {
    8238     1904790 :             if (tan_p2 >= 0)
    8239     1881590 :                 return tan_p1 < tan_p2;
    8240             :             else
    8241       23199 :                 return true;
    8242             :         }
    8243             :         else
    8244             :         {
    8245      130806 :             if (tan_p2 >= 0)
    8246      103900 :                 return false;
    8247             :             else
    8248       26906 :                 return tan_p1 < tan_p2;
    8249             :         }
    8250      666202 :     };
    8251             : 
    8252             :     // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
    8253             :     // hull
    8254      666202 :     std::sort(intersection.begin(), intersection.end(), sortFunc);
    8255             : 
    8256             :     // Remove duplicated points
    8257      666202 :     size_t j = 1;
    8258     3106240 :     for (size_t i = 1; i < intersection.size(); ++i)
    8259             :     {
    8260     2440040 :         if (intersection[i] != intersection[i - 1])
    8261             :         {
    8262     1452560 :             if (j < i)
    8263      545275 :                 intersection[j] = intersection[i];
    8264     1452560 :             ++j;
    8265             :         }
    8266             :     }
    8267      666202 :     intersection.resize(j);
    8268             : }
    8269             : 
    8270             : /************************************************************************/
    8271             : /*                            getArea()                                 */
    8272             : /************************************************************************/
    8273             : 
    8274             : // poly may or may not be closed.
    8275      558521 : static double getArea(const XYPoly &poly)
    8276             : {
    8277             :     // CPLAssert(poly.size() >= 2);
    8278      558521 :     const size_t nPointCount = poly.size();
    8279             :     double dfAreaSum =
    8280      558521 :         poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
    8281             : 
    8282     1765140 :     for (size_t i = 1; i < nPointCount - 1; i++)
    8283             :     {
    8284     1206610 :         dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
    8285             :     }
    8286             : 
    8287      558521 :     dfAreaSum += poly[nPointCount - 1].first *
    8288      558521 :                  (poly[0].second - poly[nPointCount - 2].second);
    8289             : 
    8290      558521 :     return 0.5 * std::fabs(dfAreaSum);
    8291             : }
    8292             : 
    8293             : /************************************************************************/
    8294             : /*                           GWKSumPreserving()                         */
    8295             : /************************************************************************/
    8296             : 
    8297             : static void GWKSumPreservingThread(void *pData);
    8298             : 
    8299          18 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
    8300             : {
    8301          18 :     return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
    8302             : }
    8303             : 
    8304          18 : static void GWKSumPreservingThread(void *pData)
    8305             : {
    8306          18 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    8307          18 :     GDALWarpKernel *poWK = psJob->poWK;
    8308          18 :     const int iYMin = psJob->iYMin;
    8309          18 :     const int iYMax = psJob->iYMax;
    8310             :     const bool bIsAffineNoRotation =
    8311          18 :         GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
    8312          26 :                                         poWK->pTransformerArg) &&
    8313             :         // for debug/testing purposes
    8314           8 :         CPLTestBool(
    8315          18 :             CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
    8316             : 
    8317          18 :     const int nDstXSize = poWK->nDstXSize;
    8318          18 :     const int nSrcXSize = poWK->nSrcXSize;
    8319          18 :     const int nSrcYSize = poWK->nSrcYSize;
    8320             : 
    8321          36 :     std::vector<double> adfX0(nSrcXSize + 1);
    8322          36 :     std::vector<double> adfY0(nSrcXSize + 1);
    8323          36 :     std::vector<double> adfZ0(nSrcXSize + 1);
    8324          36 :     std::vector<double> adfX1(nSrcXSize + 1);
    8325          36 :     std::vector<double> adfY1(nSrcXSize + 1);
    8326          36 :     std::vector<double> adfZ1(nSrcXSize + 1);
    8327          36 :     std::vector<int> abSuccess0(nSrcXSize + 1);
    8328          36 :     std::vector<int> abSuccess1(nSrcXSize + 1);
    8329             : 
    8330             :     CPLRectObj sGlobalBounds;
    8331          18 :     sGlobalBounds.minx = -2 * poWK->dfXScale;
    8332          18 :     sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
    8333          18 :     sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
    8334          18 :     sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
    8335          18 :     CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
    8336             : 
    8337             :     struct SourcePixel
    8338             :     {
    8339             :         int iSrcX;
    8340             :         int iSrcY;
    8341             : 
    8342             :         // Coordinates of source pixel in target pixel coordinates
    8343             :         double dfDstX0;
    8344             :         double dfDstY0;
    8345             :         double dfDstX1;
    8346             :         double dfDstY1;
    8347             :         double dfDstX2;
    8348             :         double dfDstY2;
    8349             :         double dfDstX3;
    8350             :         double dfDstY3;
    8351             : 
    8352             :         // Source pixel total area (might be larger than the one described
    8353             :         // by above coordinates, if the pixel was crossing the antimeridian
    8354             :         // and split)
    8355             :         double dfArea;
    8356             :     };
    8357             : 
    8358          36 :     std::vector<SourcePixel> sourcePixels;
    8359             : 
    8360          36 :     XYPoly discontinuityLeft(5);
    8361          36 :     XYPoly discontinuityRight(5);
    8362             : 
    8363             :     /* ==================================================================== */
    8364             :     /*      First pass: transform the 4 corners of each potential           */
    8365             :     /*      contributing source pixel to target pixel coordinates.          */
    8366             :     /* ==================================================================== */
    8367             : 
    8368             :     // Special case for top line
    8369             :     {
    8370          18 :         int iY = 0;
    8371        1130 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8372             :         {
    8373        1112 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8374        1112 :             adfY1[iX] = iY + poWK->nSrcYOff;
    8375        1112 :             adfZ1[iX] = 0;
    8376             :         }
    8377             : 
    8378          18 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8379             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8380             :                              abSuccess1.data());
    8381             : 
    8382        1130 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8383             :         {
    8384        1112 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8385           0 :                 abSuccess1[iX] = FALSE;
    8386             :             else
    8387             :             {
    8388        1112 :                 adfX1[iX] -= poWK->nDstXOff;
    8389        1112 :                 adfY1[iX] -= poWK->nDstYOff;
    8390             :             }
    8391             :         }
    8392             :     }
    8393             : 
    8394      413412 :     const auto getInsideXSign = [poWK, nDstXSize](double dfX)
    8395             :     {
    8396      413412 :         return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
    8397      205344 :                        dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
    8398      413412 :                    ? 1
    8399      208068 :                    : -1;
    8400          18 :     };
    8401             : 
    8402             :     const auto FindDiscontinuity =
    8403          80 :         [poWK, psJob, getInsideXSign](
    8404             :             double dfXLeft, double dfXRight, double dfY,
    8405             :             int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
    8406         800 :             double &dfXMidReprojectedRight, double &dfYMidReprojected)
    8407             :     {
    8408         880 :         for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
    8409             :         {
    8410         800 :             double dfXMid = (dfXLeft + dfXRight) / 2;
    8411         800 :             double dfXMidReprojected = dfXMid;
    8412         800 :             dfYMidReprojected = dfY;
    8413         800 :             double dfZ = 0;
    8414         800 :             int nSuccess = 0;
    8415         800 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
    8416             :                                  &dfXMidReprojected, &dfYMidReprojected, &dfZ,
    8417             :                                  &nSuccess);
    8418         800 :             if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
    8419             :             {
    8420         456 :                 dfXRight = dfXMid;
    8421         456 :                 dfXMidReprojectedRight = dfXMidReprojected;
    8422             :             }
    8423             :             else
    8424             :             {
    8425         344 :                 dfXLeft = dfXMid;
    8426         344 :                 dfXMidReprojectedLeft = dfXMidReprojected;
    8427             :             }
    8428             :         }
    8429          80 :     };
    8430             : 
    8431         566 :     for (int iY = 0; iY < nSrcYSize; ++iY)
    8432             :     {
    8433         548 :         std::swap(adfX0, adfX1);
    8434         548 :         std::swap(adfY0, adfY1);
    8435         548 :         std::swap(adfZ0, adfZ1);
    8436         548 :         std::swap(abSuccess0, abSuccess1);
    8437             : 
    8438      104512 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8439             :         {
    8440      103964 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8441      103964 :             adfY1[iX] = iY + 1 + poWK->nSrcYOff;
    8442      103964 :             adfZ1[iX] = 0;
    8443             :         }
    8444             : 
    8445         548 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8446             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8447             :                              abSuccess1.data());
    8448             : 
    8449      104512 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8450             :         {
    8451      103964 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8452           0 :                 abSuccess1[iX] = FALSE;
    8453             :             else
    8454             :             {
    8455      103964 :                 adfX1[iX] -= poWK->nDstXOff;
    8456      103964 :                 adfY1[iX] -= poWK->nDstYOff;
    8457             :             }
    8458             :         }
    8459             : 
    8460      103964 :         for (int iX = 0; iX < nSrcXSize; ++iX)
    8461             :         {
    8462      206832 :             if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
    8463      103416 :                 abSuccess1[iX + 1])
    8464             :             {
    8465             :                 /* --------------------------------------------------------------------
    8466             :                  */
    8467             :                 /*      Do not try to apply transparent source pixels to the
    8468             :                  * destination.*/
    8469             :                 /* --------------------------------------------------------------------
    8470             :                  */
    8471      103416 :                 const auto iSrcOffset =
    8472      103416 :                     iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
    8473      105816 :                 if (poWK->panUnifiedSrcValid != nullptr &&
    8474        2400 :                     !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    8475             :                 {
    8476       10971 :                     continue;
    8477             :                 }
    8478             : 
    8479      103410 :                 if (poWK->pafUnifiedSrcDensity != nullptr)
    8480             :                 {
    8481           0 :                     if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
    8482             :                         SRC_DENSITY_THRESHOLD)
    8483           0 :                         continue;
    8484             :                 }
    8485             : 
    8486             :                 SourcePixel sp;
    8487      103410 :                 sp.dfArea = 0;
    8488      103410 :                 sp.dfDstX0 = adfX0[iX];
    8489      103410 :                 sp.dfDstY0 = adfY0[iX];
    8490      103410 :                 sp.dfDstX1 = adfX0[iX + 1];
    8491      103410 :                 sp.dfDstY1 = adfY0[iX + 1];
    8492      103410 :                 sp.dfDstX2 = adfX1[iX + 1];
    8493      103410 :                 sp.dfDstY2 = adfY1[iX + 1];
    8494      103410 :                 sp.dfDstX3 = adfX1[iX];
    8495      103410 :                 sp.dfDstY3 = adfY1[iX];
    8496             : 
    8497             :                 // Detect pixel that likely cross the anti-meridian and
    8498             :                 // introduce a discontinuity when reprojected.
    8499             : 
    8500      103410 :                 if (getInsideXSign(adfX0[iX]) !=
    8501      103506 :                         getInsideXSign(adfX0[iX + 1]) &&
    8502         164 :                     getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
    8503          68 :                     getInsideXSign(adfX0[iX + 1]) ==
    8504      103574 :                         getInsideXSign(adfX1[iX + 1]) &&
    8505          40 :                     (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
    8506             :                         0)
    8507             :                 {
    8508          40 :                     double dfXMidReprojectedLeftTop = 0;
    8509          40 :                     double dfXMidReprojectedRightTop = 0;
    8510          40 :                     double dfYMidReprojectedTop = 0;
    8511          40 :                     FindDiscontinuity(
    8512          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8513          80 :                         iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
    8514             :                         dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
    8515             :                         dfYMidReprojectedTop);
    8516          40 :                     double dfXMidReprojectedLeftBottom = 0;
    8517          40 :                     double dfXMidReprojectedRightBottom = 0;
    8518          40 :                     double dfYMidReprojectedBottom = 0;
    8519          40 :                     FindDiscontinuity(
    8520          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8521          80 :                         iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
    8522             :                         dfXMidReprojectedLeftBottom,
    8523             :                         dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
    8524             : 
    8525          40 :                     discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
    8526          40 :                     discontinuityLeft[1] =
    8527          80 :                         XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
    8528          40 :                     discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
    8529          40 :                                                   dfYMidReprojectedBottom);
    8530          40 :                     discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
    8531          40 :                     discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
    8532             : 
    8533          40 :                     discontinuityRight[0] =
    8534          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8535          40 :                     discontinuityRight[1] =
    8536          80 :                         XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
    8537          40 :                     discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
    8538          40 :                                                    dfYMidReprojectedBottom);
    8539          40 :                     discontinuityRight[3] =
    8540          80 :                         XYPair(adfX1[iX + 1], adfY1[iX + 1]);
    8541          40 :                     discontinuityRight[4] =
    8542          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8543             : 
    8544          40 :                     sp.dfArea = getArea(discontinuityLeft) +
    8545          40 :                                 getArea(discontinuityRight);
    8546          40 :                     if (getInsideXSign(adfX0[iX]) >= 1)
    8547             :                     {
    8548          20 :                         sp.dfDstX1 = dfXMidReprojectedLeftTop;
    8549          20 :                         sp.dfDstY1 = dfYMidReprojectedTop;
    8550          20 :                         sp.dfDstX2 = dfXMidReprojectedLeftBottom;
    8551          20 :                         sp.dfDstY2 = dfYMidReprojectedBottom;
    8552             :                     }
    8553             :                     else
    8554             :                     {
    8555          20 :                         sp.dfDstX0 = dfXMidReprojectedRightTop;
    8556          20 :                         sp.dfDstY0 = dfYMidReprojectedTop;
    8557          20 :                         sp.dfDstX3 = dfXMidReprojectedRightBottom;
    8558          20 :                         sp.dfDstY3 = dfYMidReprojectedBottom;
    8559             :                     }
    8560             :                 }
    8561             : 
    8562             :                 // Bounding box of source pixel (expressed in target pixel
    8563             :                 // coordinates)
    8564             :                 CPLRectObj sRect;
    8565      103410 :                 sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
    8566      103410 :                                       std::min(sp.dfDstX2, sp.dfDstX3));
    8567      103410 :                 sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
    8568      103410 :                                       std::min(sp.dfDstY2, sp.dfDstY3));
    8569      103410 :                 sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
    8570      103410 :                                       std::max(sp.dfDstX2, sp.dfDstX3));
    8571      103410 :                 sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
    8572      103410 :                                       std::max(sp.dfDstY2, sp.dfDstY3));
    8573      103410 :                 if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
    8574      101350 :                       sRect.miny < iYMax && sRect.maxy > iYMin))
    8575             :                 {
    8576       10852 :                     continue;
    8577             :                 }
    8578             : 
    8579       92558 :                 sp.iSrcX = iX;
    8580       92558 :                 sp.iSrcY = iY;
    8581             : 
    8582       92558 :                 if (!bIsAffineNoRotation)
    8583             :                 {
    8584             :                     // Check polygon validity (no self-crossing)
    8585       89745 :                     XYPair xy;
    8586       89745 :                     if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
    8587       89745 :                                         XYPair(sp.dfDstX1, sp.dfDstY1),
    8588       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8589      269235 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
    8590       89745 :                         getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
    8591       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8592       89745 :                                         XYPair(sp.dfDstX0, sp.dfDstY0),
    8593      179490 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy))
    8594             :                     {
    8595         113 :                         continue;
    8596             :                     }
    8597             :                 }
    8598             : 
    8599       92445 :                 CPLQuadTreeInsertWithBounds(
    8600             :                     hQuadTree,
    8601             :                     reinterpret_cast<void *>(
    8602       92445 :                         static_cast<uintptr_t>(sourcePixels.size())),
    8603             :                     &sRect);
    8604             : 
    8605       92445 :                 sourcePixels.push_back(sp);
    8606             :             }
    8607             :         }
    8608             :     }
    8609             : 
    8610          36 :     std::vector<double> adfRealValue(poWK->nBands);
    8611          36 :     std::vector<double> adfImagValue(poWK->nBands);
    8612          36 :     std::vector<double> adfBandDensity(poWK->nBands);
    8613          36 :     std::vector<double> adfWeight(poWK->nBands);
    8614             : 
    8615             : #ifdef CHECK_SUM_WITH_GEOS
    8616             :     auto hGEOSContext = OGRGeometry::createGEOSContext();
    8617             :     auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8618             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
    8619             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
    8620             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
    8621             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
    8622             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
    8623             :     auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
    8624             :     auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
    8625             : 
    8626             :     auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8627             :     auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
    8628             :     auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
    8629             : #endif
    8630             : 
    8631             :     const XYPoly xy1{
    8632          36 :         {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
    8633          36 :     XYPoly xy2(5);
    8634          36 :     XYPoly xy2_triangle(4);
    8635          36 :     XYPoly intersection;
    8636             : 
    8637             :     /* ==================================================================== */
    8638             :     /*      Loop over output lines.                                         */
    8639             :     /* ==================================================================== */
    8640         891 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    8641             :     {
    8642             :         CPLRectObj sRect;
    8643         873 :         sRect.miny = iDstY;
    8644         873 :         sRect.maxy = iDstY + 1;
    8645             : 
    8646             :         /* ====================================================================
    8647             :          */
    8648             :         /*      Loop over pixels in output scanline. */
    8649             :         /* ====================================================================
    8650             :          */
    8651      221042 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    8652             :         {
    8653      220169 :             sRect.minx = iDstX;
    8654      220169 :             sRect.maxx = iDstX + 1;
    8655      220169 :             int nSourcePixels = 0;
    8656             :             void **pahSourcePixel =
    8657      220169 :                 CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
    8658      220169 :             if (nSourcePixels == 0)
    8659             :             {
    8660        1258 :                 CPLFree(pahSourcePixel);
    8661        1262 :                 continue;
    8662             :             }
    8663             : 
    8664      218911 :             std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
    8665      218911 :             std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
    8666      218911 :             std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
    8667      218911 :             std::fill(adfWeight.begin(), adfWeight.end(), 0);
    8668      218911 :             double dfDensity = 0;
    8669      218911 :             double dfTotalWeight = 0;
    8670             : 
    8671             :             /* ====================================================================
    8672             :              */
    8673             :             /*          Iterate over each contributing source pixel to add its
    8674             :              */
    8675             :             /*          value weighed by the ratio of the area of its
    8676             :              * intersection  */
    8677             :             /*          with the target pixel divided by the area of the source
    8678             :              */
    8679             :             /*          pixel. */
    8680             :             /* ====================================================================
    8681             :              */
    8682     1020520 :             for (int i = 0; i < nSourcePixels; ++i)
    8683             :             {
    8684      801614 :                 const int iSourcePixel = static_cast<int>(
    8685      801614 :                     reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
    8686      801614 :                 auto &sp = sourcePixels[iSourcePixel];
    8687             : 
    8688      801614 :                 double dfWeight = 0.0;
    8689      801614 :                 if (bIsAffineNoRotation)
    8690             :                 {
    8691             :                     // Optimization since the source pixel is a rectangle in
    8692             :                     // target pixel coordinates
    8693       16312 :                     double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
    8694       16312 :                     double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
    8695       16312 :                     double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
    8696       16312 :                     double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
    8697       16312 :                     double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
    8698       16312 :                     double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
    8699       16312 :                     double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
    8700       16312 :                     double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
    8701       16312 :                     dfWeight =
    8702       16312 :                         ((dfIntersMaxX - dfIntersMinX) *
    8703       16312 :                          (dfIntersMaxY - dfIntersMinY)) /
    8704       16312 :                         ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
    8705             :                 }
    8706             :                 else
    8707             :                 {
    8708             :                     // Compute the polygon of the source pixel in target pixel
    8709             :                     // coordinates, and shifted to the target pixel (unit square
    8710             :                     // coordinates)
    8711             : 
    8712      785302 :                     xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    8713      785302 :                     xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
    8714      785302 :                     xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
    8715      785302 :                     xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
    8716      785302 :                     xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    8717             : 
    8718      785302 :                     if (isConvex(xy2))
    8719             :                     {
    8720      785302 :                         getConvexPolyIntersection(xy1, xy2, intersection);
    8721      785302 :                         if (intersection.size() >= 3)
    8722             :                         {
    8723      468849 :                             dfWeight = getArea(intersection);
    8724             :                         }
    8725             :                     }
    8726             :                     else
    8727             :                     {
    8728             :                         // Split xy2 into 2 triangles.
    8729           0 :                         xy2_triangle[0] = xy2[0];
    8730           0 :                         xy2_triangle[1] = xy2[1];
    8731           0 :                         xy2_triangle[2] = xy2[2];
    8732           0 :                         xy2_triangle[3] = xy2[0];
    8733           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    8734             :                                                   intersection);
    8735           0 :                         if (intersection.size() >= 3)
    8736             :                         {
    8737           0 :                             dfWeight = getArea(intersection);
    8738             :                         }
    8739             : 
    8740           0 :                         xy2_triangle[1] = xy2[2];
    8741           0 :                         xy2_triangle[2] = xy2[3];
    8742           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    8743             :                                                   intersection);
    8744           0 :                         if (intersection.size() >= 3)
    8745             :                         {
    8746           0 :                             dfWeight += getArea(intersection);
    8747             :                         }
    8748             :                     }
    8749      785302 :                     if (dfWeight > 0.0)
    8750             :                     {
    8751      468828 :                         if (sp.dfArea == 0)
    8752       89592 :                             sp.dfArea = getArea(xy2);
    8753      468828 :                         dfWeight /= sp.dfArea;
    8754             :                     }
    8755             : 
    8756             : #ifdef CHECK_SUM_WITH_GEOS
    8757             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
    8758             :                                          sp.dfDstX0 - iDstX,
    8759             :                                          sp.dfDstY0 - iDstY);
    8760             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
    8761             :                                          sp.dfDstX1 - iDstX,
    8762             :                                          sp.dfDstY1 - iDstY);
    8763             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
    8764             :                                          sp.dfDstX2 - iDstX,
    8765             :                                          sp.dfDstY2 - iDstY);
    8766             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
    8767             :                                          sp.dfDstX3 - iDstX,
    8768             :                                          sp.dfDstY3 - iDstY);
    8769             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
    8770             :                                          sp.dfDstX0 - iDstX,
    8771             :                                          sp.dfDstY0 - iDstY);
    8772             : 
    8773             :                     double dfWeightGEOS = 0.0;
    8774             :                     auto hIntersection =
    8775             :                         GEOSIntersection_r(hGEOSContext, hP1, hP2);
    8776             :                     if (hIntersection)
    8777             :                     {
    8778             :                         double dfIntersArea = 0.0;
    8779             :                         if (GEOSArea_r(hGEOSContext, hIntersection,
    8780             :                                        &dfIntersArea) &&
    8781             :                             dfIntersArea > 0)
    8782             :                         {
    8783             :                             double dfSourceArea = 0.0;
    8784             :                             if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
    8785             :                             {
    8786             :                                 dfWeightGEOS = dfIntersArea / dfSourceArea;
    8787             :                             }
    8788             :                         }
    8789             :                         GEOSGeom_destroy_r(hGEOSContext, hIntersection);
    8790             :                     }
    8791             :                     if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
    8792             :                     {
    8793             :                         /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
    8794             :                                         dfWeight, dfWeightGEOS);
    8795             :                         printf("xy2: ");  // ok
    8796             :                         for (const auto &xy : xy2)
    8797             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    8798             :                         printf("\n");                                   // ok
    8799             :                         printf("intersection: ");                       // ok
    8800             :                         for (const auto &xy : intersection)
    8801             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    8802             :                         printf("\n");                                   // ok
    8803             :                     }
    8804             : #endif
    8805             :                 }
    8806      801614 :                 if (dfWeight > 0.0)
    8807             :                 {
    8808      474099 :                     const GPtrDiff_t iSrcOffset =
    8809      474099 :                         sp.iSrcX +
    8810      474099 :                         static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
    8811      474099 :                     dfTotalWeight += dfWeight;
    8812             : 
    8813      474099 :                     if (poWK->pafUnifiedSrcDensity != nullptr)
    8814             :                     {
    8815           0 :                         dfDensity +=
    8816           0 :                             dfWeight * poWK->pafUnifiedSrcDensity[iSrcOffset];
    8817             :                     }
    8818             :                     else
    8819             :                     {
    8820      474099 :                         dfDensity += dfWeight;
    8821             :                     }
    8822             : 
    8823     1818720 :                     for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    8824             :                     {
    8825             :                         // Returns pixel value if it is not no data.
    8826             :                         double dfBandDensity;
    8827             :                         double dfRealValue;
    8828             :                         double dfImagValue;
    8829     2689240 :                         if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
    8830             :                                                &dfBandDensity, &dfRealValue,
    8831             :                                                &dfImagValue) &&
    8832     1344620 :                               dfBandDensity > BAND_DENSITY_THRESHOLD))
    8833             :                         {
    8834           0 :                             continue;
    8835             :                         }
    8836             : 
    8837     1344620 :                         adfRealValue[iBand] += dfRealValue * dfWeight;
    8838     1344620 :                         adfImagValue[iBand] += dfImagValue * dfWeight;
    8839     1344620 :                         adfBandDensity[iBand] += dfBandDensity * dfWeight;
    8840     1344620 :                         adfWeight[iBand] += dfWeight;
    8841             :                     }
    8842             :                 }
    8843             :             }
    8844             : 
    8845      218911 :             CPLFree(pahSourcePixel);
    8846             : 
    8847             :             /* --------------------------------------------------------------------
    8848             :              */
    8849             :             /*          Update destination pixel value. */
    8850             :             /* --------------------------------------------------------------------
    8851             :              */
    8852      218911 :             bool bHasFoundDensity = false;
    8853      218911 :             const GPtrDiff_t iDstOffset =
    8854      218911 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    8855      827822 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    8856             :             {
    8857      608911 :                 if (adfWeight[iBand] > 0)
    8858             :                 {
    8859             :                     const double dfBandDensity =
    8860      608907 :                         adfBandDensity[iBand] / adfWeight[iBand];
    8861      608907 :                     if (dfBandDensity > BAND_DENSITY_THRESHOLD)
    8862             :                     {
    8863      608907 :                         bHasFoundDensity = true;
    8864      608907 :                         GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    8865      608907 :                                          adfRealValue[iBand],
    8866      608907 :                                          adfImagValue[iBand]);
    8867             :                     }
    8868             :                 }
    8869             :             }
    8870             : 
    8871      218911 :             if (!bHasFoundDensity)
    8872           4 :                 continue;
    8873             : 
    8874             :             /* --------------------------------------------------------------------
    8875             :              */
    8876             :             /*          Update destination density/validity masks. */
    8877             :             /* --------------------------------------------------------------------
    8878             :              */
    8879      218907 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
    8880             : 
    8881      218907 :             if (poWK->panDstValid != nullptr)
    8882             :             {
    8883       11750 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    8884             :             }
    8885             :         }
    8886             : 
    8887             :         /* --------------------------------------------------------------------
    8888             :          */
    8889             :         /*      Report progress to the user, and optionally cancel out. */
    8890             :         /* --------------------------------------------------------------------
    8891             :          */
    8892         873 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    8893           0 :             break;
    8894             :     }
    8895             : 
    8896             : #ifdef CHECK_SUM_WITH_GEOS
    8897             :     GEOSGeom_destroy_r(hGEOSContext, hP1);
    8898             :     GEOSGeom_destroy_r(hGEOSContext, hP2);
    8899             :     OGRGeometry::freeGEOSContext(hGEOSContext);
    8900             : #endif
    8901          18 :     CPLQuadTreeDestroy(hQuadTree);
    8902          18 : }

Generated by: LCOV version 1.14