LCOV - code coverage report
Current view: top level - alg - gdalwarpkernel.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 3114 3648 85.4 %
Date: 2025-03-28 11:40:40 Functions: 171 186 91.9 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  High Performance Image Reprojector
       4             :  * Purpose:  Implementation of the GDALWarpKernel class.  Implements the actual
       5             :  *           image warping for a "chunk" of input and output imagery already
       6             :  *           loaded into memory.
       7             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       8             :  *
       9             :  ******************************************************************************
      10             :  * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
      11             :  * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
      12             :  *
      13             :  * SPDX-License-Identifier: MIT
      14             :  ****************************************************************************/
      15             : 
      16             : #include "cpl_port.h"
      17             : #include "gdalwarper.h"
      18             : 
      19             : #include <cfloat>
      20             : #include <cmath>
      21             : #include <cstddef>
      22             : #include <cstdlib>
      23             : #include <cstring>
      24             : 
      25             : #include <algorithm>
      26             : #include <limits>
      27             : #include <mutex>
      28             : #include <new>
      29             : #include <utility>
      30             : #include <vector>
      31             : 
      32             : #include "cpl_atomic_ops.h"
      33             : #include "cpl_conv.h"
      34             : #include "cpl_error.h"
      35             : #include "cpl_float.h"
      36             : #include "cpl_mask.h"
      37             : #include "cpl_multiproc.h"
      38             : #include "cpl_progress.h"
      39             : #include "cpl_string.h"
      40             : #include "cpl_vsi.h"
      41             : #include "cpl_worker_thread_pool.h"
      42             : #include "cpl_quad_tree.h"
      43             : #include "gdal.h"
      44             : #include "gdal_alg.h"
      45             : #include "gdal_alg_priv.h"
      46             : #include "gdal_thread_pool.h"
      47             : #include "gdalresamplingkernels.h"
      48             : 
      49             : // #define CHECK_SUM_WITH_GEOS
      50             : #ifdef CHECK_SUM_WITH_GEOS
      51             : #include "ogr_geometry.h"
      52             : #include "ogr_geos.h"
      53             : #endif
      54             : 
      55             : #ifdef USE_NEON_OPTIMIZATIONS
      56             : #include "include_sse2neon.h"
      57             : #define USE_SSE2
      58             : 
      59             : #include "gdalsse_priv.h"
      60             : 
      61             : // We restrict to 64bit processors because they are guaranteed to have SSE2.
      62             : // Could possibly be used too on 32bit, but we would need to check at runtime.
      63             : #elif defined(__x86_64) || defined(_M_X64)
      64             : #define USE_SSE2
      65             : 
      66             : #include "gdalsse_priv.h"
      67             : 
      68             : #if __SSE4_1__
      69             : #include <smmintrin.h>
      70             : #endif
      71             : 
      72             : #if __SSE3__
      73             : #include <pmmintrin.h>
      74             : #endif
      75             : 
      76             : #endif
      77             : 
      78             : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
      79             : constexpr float SRC_DENSITY_THRESHOLD = 0.000000001f;
      80             : 
      81             : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
      82             : 
      83             : static const int anGWKFilterRadius[] = {
      84             :     0,  // Nearest neighbour
      85             :     1,  // Bilinear
      86             :     2,  // Cubic Convolution (Catmull-Rom)
      87             :     2,  // Cubic B-Spline
      88             :     3,  // Lanczos windowed sinc
      89             :     0,  // Average
      90             :     0,  // Mode
      91             :     0,  // Reserved GRA_Gauss=7
      92             :     0,  // Max
      93             :     0,  // Min
      94             :     0,  // Med
      95             :     0,  // Q1
      96             :     0,  // Q3
      97             :     0,  // Sum
      98             :     0,  // RMS
      99             : };
     100             : 
     101             : static double GWKBilinear(double dfX);
     102             : static double GWKCubic(double dfX);
     103             : static double GWKBSpline(double dfX);
     104             : static double GWKLanczosSinc(double dfX);
     105             : 
     106             : static const FilterFuncType apfGWKFilter[] = {
     107             :     nullptr,         // Nearest neighbour
     108             :     GWKBilinear,     // Bilinear
     109             :     GWKCubic,        // Cubic Convolution (Catmull-Rom)
     110             :     GWKBSpline,      // Cubic B-Spline
     111             :     GWKLanczosSinc,  // Lanczos windowed sinc
     112             :     nullptr,         // Average
     113             :     nullptr,         // Mode
     114             :     nullptr,         // Reserved GRA_Gauss=7
     115             :     nullptr,         // Max
     116             :     nullptr,         // Min
     117             :     nullptr,         // Med
     118             :     nullptr,         // Q1
     119             :     nullptr,         // Q3
     120             :     nullptr,         // Sum
     121             :     nullptr,         // RMS
     122             : };
     123             : 
     124             : // TODO(schwehr): Can we make these functions have a const * const arg?
     125             : static double GWKBilinear4Values(double *padfVals);
     126             : static double GWKCubic4Values(double *padfVals);
     127             : static double GWKBSpline4Values(double *padfVals);
     128             : static double GWKLanczosSinc4Values(double *padfVals);
     129             : 
     130             : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
     131             :     nullptr,                // Nearest neighbour
     132             :     GWKBilinear4Values,     // Bilinear
     133             :     GWKCubic4Values,        // Cubic Convolution (Catmull-Rom)
     134             :     GWKBSpline4Values,      // Cubic B-Spline
     135             :     GWKLanczosSinc4Values,  // Lanczos windowed sinc
     136             :     nullptr,                // Average
     137             :     nullptr,                // Mode
     138             :     nullptr,                // Reserved GRA_Gauss=7
     139             :     nullptr,                // Max
     140             :     nullptr,                // Min
     141             :     nullptr,                // Med
     142             :     nullptr,                // Q1
     143             :     nullptr,                // Q3
     144             :     nullptr,                // Sum
     145             :     nullptr,                // RMS
     146             : };
     147             : 
     148        9907 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
     149             : {
     150             :     static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
     151             :                   "Bad size of anGWKFilterRadius");
     152        9907 :     return anGWKFilterRadius[eResampleAlg];
     153             : }
     154             : 
     155        3731 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
     156             : {
     157             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
     158             :                   "Bad size of apfGWKFilter");
     159        3731 :     return apfGWKFilter[eResampleAlg];
     160             : }
     161             : 
     162        3732 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
     163             : {
     164             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
     165             :                   "Bad size of apfGWKFilter4Values");
     166        3732 :     return apfGWKFilter4Values[eResampleAlg];
     167             : }
     168             : 
     169             : static CPLErr GWKGeneralCase(GDALWarpKernel *);
     170             : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
     171             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     172             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     173             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     174             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     175             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     176             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     177             : #endif
     178             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     179             : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
     180             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     181             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     182             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     183             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     184             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     185             : #endif
     186             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     187             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     188             : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
     189             : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
     190             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     191             : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
     192             : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
     193             : static CPLErr GWKSumPreserving(GDALWarpKernel *);
     194             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     195             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     196             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     197             : 
     198             : /************************************************************************/
     199             : /*                           GWKJobStruct                               */
     200             : /************************************************************************/
     201             : 
     202             : struct GWKJobStruct
     203             : {
     204             :     std::mutex &mutex;
     205             :     std::condition_variable &cv;
     206             :     int &counter;
     207             :     bool &stopFlag;
     208             :     GDALWarpKernel *poWK;
     209             :     int iYMin;
     210             :     int iYMax;
     211             :     int (*pfnProgress)(GWKJobStruct *psJob);
     212             :     void *pTransformerArg;
     213             :     void (*pfnFunc)(
     214             :         void *);  // used by GWKRun() to assign the proper pTransformerArg
     215             : 
     216        2105 :     GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
     217             :                  int &counter_, bool &stopFlag_)
     218        2105 :         : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_),
     219             :           poWK(nullptr), iYMin(0), iYMax(0), pfnProgress(nullptr),
     220        2105 :           pTransformerArg(nullptr), pfnFunc(nullptr)
     221             :     {
     222        2105 :     }
     223             : };
     224             : 
     225             : struct GWKThreadData
     226             : {
     227             :     std::unique_ptr<CPLJobQueue> poJobQueue{};
     228             :     std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
     229             :     int nMaxThreads{0};
     230             :     int counter{0};
     231             :     bool stopFlag{false};
     232             :     std::mutex mutex{};
     233             :     std::condition_variable cv{};
     234             :     bool bTransformerArgInputAssignedToThread{false};
     235             :     void *pTransformerArgInput{
     236             :         nullptr};  // owned by calling layer. Not to be destroyed
     237             :     std::map<GIntBig, void *> mapThreadToTransformerArg{};
     238             :     int nTotalThreadCountForThisRun = 0;
     239             :     int nCurThreadCountForThisRun = 0;
     240             : };
     241             : 
     242             : /************************************************************************/
     243             : /*                        GWKProgressThread()                           */
     244             : /************************************************************************/
     245             : 
     246             : // Return TRUE if the computation must be interrupted.
     247           5 : static int GWKProgressThread(GWKJobStruct *psJob)
     248             : {
     249           5 :     bool stop = false;
     250             :     {
     251           5 :         std::lock_guard<std::mutex> lock(psJob->mutex);
     252           5 :         psJob->counter++;
     253           5 :         stop = psJob->stopFlag;
     254             :     }
     255           5 :     psJob->cv.notify_one();
     256             : 
     257           5 :     return stop;
     258             : }
     259             : 
     260             : /************************************************************************/
     261             : /*                      GWKProgressMonoThread()                         */
     262             : /************************************************************************/
     263             : 
     264             : // Return TRUE if the computation must be interrupted.
     265      204563 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
     266             : {
     267      204563 :     GDALWarpKernel *poWK = psJob->poWK;
     268             :     // coverity[missing_lock]
     269      204563 :     if (!poWK->pfnProgress(
     270      204563 :             poWK->dfProgressBase +
     271      204563 :                 poWK->dfProgressScale *
     272      204563 :                     (++psJob->counter / static_cast<double>(psJob->iYMax)),
     273             :             "", poWK->pProgress))
     274             :     {
     275           1 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     276           1 :         psJob->stopFlag = true;
     277           1 :         return TRUE;
     278             :     }
     279      204562 :     return FALSE;
     280             : }
     281             : 
     282             : /************************************************************************/
     283             : /*                       GWKGenericMonoThread()                         */
     284             : /************************************************************************/
     285             : 
     286        2100 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
     287             :                                    void (*pfnFunc)(void *pUserData))
     288             : {
     289        2100 :     GWKThreadData td;
     290             : 
     291             :     // NOTE: the mutex is not used.
     292        2100 :     GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
     293        2100 :     job.poWK = poWK;
     294        2100 :     job.iYMin = 0;
     295        2100 :     job.iYMax = poWK->nDstYSize;
     296        2100 :     job.pfnProgress = GWKProgressMonoThread;
     297        2100 :     job.pTransformerArg = poWK->pTransformerArg;
     298        2100 :     pfnFunc(&job);
     299             : 
     300        4200 :     return td.stopFlag ? CE_Failure : CE_None;
     301             : }
     302             : 
     303             : /************************************************************************/
     304             : /*                          GWKThreadsCreate()                          */
     305             : /************************************************************************/
     306             : 
     307        1409 : void *GWKThreadsCreate(char **papszWarpOptions,
     308             :                        GDALTransformerFunc /* pfnTransformer */,
     309             :                        void *pTransformerArg)
     310             : {
     311             :     const char *pszWarpThreads =
     312        1409 :         CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
     313        1409 :     if (pszWarpThreads == nullptr)
     314        1409 :         pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
     315             : 
     316        1409 :     int nThreads = 0;
     317        1409 :     if (EQUAL(pszWarpThreads, "ALL_CPUS"))
     318           3 :         nThreads = CPLGetNumCPUs();
     319             :     else
     320        1406 :         nThreads = atoi(pszWarpThreads);
     321        1409 :     if (nThreads <= 1)
     322        1404 :         nThreads = 0;
     323        1409 :     if (nThreads > 128)
     324           0 :         nThreads = 128;
     325             : 
     326        1409 :     GWKThreadData *psThreadData = new GWKThreadData();
     327             :     auto poThreadPool =
     328        1409 :         nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
     329        1409 :     if (nThreads && poThreadPool)
     330             :     {
     331           5 :         psThreadData->nMaxThreads = nThreads;
     332           5 :         psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
     333             :             nThreads,
     334           5 :             GWKJobStruct(psThreadData->mutex, psThreadData->cv,
     335          10 :                          psThreadData->counter, psThreadData->stopFlag)));
     336             : 
     337           5 :         psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
     338           5 :         psThreadData->pTransformerArgInput = pTransformerArg;
     339             :     }
     340             : 
     341        1409 :     return psThreadData;
     342             : }
     343             : 
     344             : /************************************************************************/
     345             : /*                             GWKThreadsEnd()                          */
     346             : /************************************************************************/
     347             : 
     348        1409 : void GWKThreadsEnd(void *psThreadDataIn)
     349             : {
     350        1409 :     if (psThreadDataIn == nullptr)
     351           0 :         return;
     352             : 
     353        1409 :     GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
     354        1409 :     if (psThreadData->poJobQueue)
     355             :     {
     356             :         // cppcheck-suppress constVariableReference
     357          15 :         for (auto &pair : psThreadData->mapThreadToTransformerArg)
     358             :         {
     359          10 :             CPLAssert(pair.second != psThreadData->pTransformerArgInput);
     360          10 :             GDALDestroyTransformer(pair.second);
     361             :         }
     362           5 :         psThreadData->poJobQueue.reset();
     363             :     }
     364        1409 :     delete psThreadData;
     365             : }
     366             : 
     367             : /************************************************************************/
     368             : /*                         ThreadFuncAdapter()                          */
     369             : /************************************************************************/
     370             : 
     371          15 : static void ThreadFuncAdapter(void *pData)
     372             : {
     373          15 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
     374          15 :     GWKThreadData *psThreadData =
     375          15 :         static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
     376             : 
     377             :     // Look if we have already a per-thread transformer
     378          15 :     void *pTransformerArg = nullptr;
     379          15 :     const GIntBig nThreadId = CPLGetPID();
     380             : 
     381             :     {
     382          30 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     383          15 :         ++psThreadData->nCurThreadCountForThisRun;
     384             : 
     385          15 :         auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
     386          15 :         if (oIter != psThreadData->mapThreadToTransformerArg.end())
     387             :         {
     388           0 :             pTransformerArg = oIter->second;
     389             :         }
     390          15 :         else if (!psThreadData->bTransformerArgInputAssignedToThread &&
     391          15 :                  psThreadData->nCurThreadCountForThisRun ==
     392          15 :                      psThreadData->nTotalThreadCountForThisRun)
     393             :         {
     394             :             // If we are the last thread to be started, temporarily borrow the
     395             :             // original transformer
     396           5 :             psThreadData->bTransformerArgInputAssignedToThread = true;
     397           5 :             pTransformerArg = psThreadData->pTransformerArgInput;
     398           5 :             psThreadData->mapThreadToTransformerArg[nThreadId] =
     399             :                 pTransformerArg;
     400             :         }
     401             : 
     402          15 :         if (pTransformerArg == nullptr)
     403             :         {
     404          10 :             CPLAssert(psThreadData->pTransformerArgInput != nullptr);
     405          10 :             CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
     406             :         }
     407             :     }
     408             : 
     409             :     // If no transformer assigned to current thread, instantiate one
     410          15 :     if (pTransformerArg == nullptr)
     411             :     {
     412             :         // This somehow assumes that GDALCloneTransformer() is thread-safe
     413             :         // which should normally be the case.
     414             :         pTransformerArg =
     415          10 :             GDALCloneTransformer(psThreadData->pTransformerArgInput);
     416             : 
     417             :         // Lock for the stop flag and the transformer map.
     418          10 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     419          10 :         if (!pTransformerArg)
     420             :         {
     421           0 :             psJob->stopFlag = true;
     422           0 :             return;
     423             :         }
     424          10 :         psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
     425             :     }
     426             : 
     427          15 :     psJob->pTransformerArg = pTransformerArg;
     428          15 :     psJob->pfnFunc(pData);
     429             : 
     430             :     // Give back original transformer, if borrowed.
     431             :     {
     432          30 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     433          15 :         if (psThreadData->bTransformerArgInputAssignedToThread &&
     434           7 :             pTransformerArg == psThreadData->pTransformerArgInput)
     435             :         {
     436             :             psThreadData->mapThreadToTransformerArg.erase(
     437           5 :                 psThreadData->mapThreadToTransformerArg.find(nThreadId));
     438           5 :             psThreadData->bTransformerArgInputAssignedToThread = false;
     439             :         }
     440             :     }
     441             : }
     442             : 
     443             : /************************************************************************/
     444             : /*                                GWKRun()                              */
     445             : /************************************************************************/
     446             : 
     447        2105 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
     448             :                      void (*pfnFunc)(void *pUserData))
     449             : 
     450             : {
     451        2105 :     const int nDstYSize = poWK->nDstYSize;
     452             : 
     453        2105 :     CPLDebug("GDAL",
     454             :              "GDALWarpKernel()::%s() "
     455             :              "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
     456             :              pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
     457             :              poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
     458             :              poWK->nDstYSize);
     459             : 
     460        2105 :     if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
     461             :     {
     462           0 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     463           0 :         return CE_Failure;
     464             :     }
     465             : 
     466        2105 :     GWKThreadData *psThreadData =
     467             :         static_cast<GWKThreadData *>(poWK->psThreadData);
     468        2105 :     if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
     469             :     {
     470        2100 :         return GWKGenericMonoThread(poWK, pfnFunc);
     471             :     }
     472             : 
     473           5 :     int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
     474             :     // Config option mostly useful for tests to be able to test multithreading
     475             :     // with small rasters
     476             :     const int nWarpChunkSize =
     477           5 :         atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
     478           5 :     if (nWarpChunkSize > 0)
     479             :     {
     480           3 :         GIntBig nChunks =
     481           3 :             static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
     482           3 :         if (nThreads > nChunks)
     483           1 :             nThreads = static_cast<int>(nChunks);
     484             :     }
     485           5 :     if (nThreads <= 0)
     486           1 :         nThreads = 1;
     487             : 
     488           5 :     CPLDebug("WARP", "Using %d threads", nThreads);
     489             : 
     490           5 :     auto &jobs = *psThreadData->threadJobs;
     491           5 :     CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
     492             :     // Fill-in job structures.
     493          20 :     for (int i = 0; i < nThreads; ++i)
     494             :     {
     495          15 :         auto &job = jobs[i];
     496          15 :         job.poWK = poWK;
     497          15 :         job.iYMin =
     498          15 :             static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
     499          15 :         job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
     500          15 :                                      nThreads);
     501          15 :         if (poWK->pfnProgress != GDALDummyProgress)
     502           1 :             job.pfnProgress = GWKProgressThread;
     503          15 :         job.pfnFunc = pfnFunc;
     504             :     }
     505             : 
     506             :     bool bStopFlag;
     507             :     {
     508           5 :         std::unique_lock<std::mutex> lock(psThreadData->mutex);
     509             : 
     510           5 :         psThreadData->nTotalThreadCountForThisRun = nThreads;
     511             :         // coverity[missing_lock]
     512           5 :         psThreadData->nCurThreadCountForThisRun = 0;
     513             : 
     514             :         // Start jobs.
     515          20 :         for (int i = 0; i < nThreads; ++i)
     516             :         {
     517          15 :             auto &job = jobs[i];
     518          15 :             psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
     519             :                                                 static_cast<void *>(&job));
     520             :         }
     521             : 
     522             :         /* --------------------------------------------------------------------
     523             :          */
     524             :         /*      Report progress. */
     525             :         /* --------------------------------------------------------------------
     526             :          */
     527           5 :         if (poWK->pfnProgress != GDALDummyProgress)
     528             :         {
     529           4 :             while (psThreadData->counter < nDstYSize)
     530             :             {
     531           4 :                 psThreadData->cv.wait(lock);
     532           4 :                 if (!poWK->pfnProgress(poWK->dfProgressBase +
     533           4 :                                            poWK->dfProgressScale *
     534           4 :                                                (psThreadData->counter /
     535           4 :                                                 static_cast<double>(nDstYSize)),
     536             :                                        "", poWK->pProgress))
     537             :                 {
     538           1 :                     CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     539           1 :                     psThreadData->stopFlag = true;
     540           1 :                     break;
     541             :                 }
     542             :             }
     543             :         }
     544             : 
     545           5 :         bStopFlag = psThreadData->stopFlag;
     546             :     }
     547             : 
     548             :     /* -------------------------------------------------------------------- */
     549             :     /*      Wait for all jobs to complete.                                  */
     550             :     /* -------------------------------------------------------------------- */
     551           5 :     psThreadData->poJobQueue->WaitCompletion();
     552             : 
     553           5 :     return bStopFlag ? CE_Failure : CE_None;
     554             : }
     555             : 
     556             : /************************************************************************/
     557             : /* ==================================================================== */
     558             : /*                            GDALWarpKernel                            */
     559             : /* ==================================================================== */
     560             : /************************************************************************/
     561             : 
     562             : /**
     563             :  * \class GDALWarpKernel "gdalwarper.h"
     564             :  *
     565             :  * Low level image warping class.
     566             :  *
     567             :  * This class is responsible for low level image warping for one
     568             :  * "chunk" of imagery.  The class is essentially a structure with all
     569             :  * data members public - primarily so that new special-case functions
     570             :  * can be added without changing the class declaration.
     571             :  *
     572             :  * Applications are normally intended to interactive with warping facilities
     573             :  * through the GDALWarpOperation class, though the GDALWarpKernel can in
     574             :  * theory be used directly if great care is taken in setting up the
     575             :  * control data.
     576             :  *
     577             :  * <h3>Design Issues</h3>
     578             :  *
     579             :  * The intention is that PerformWarp() would analyze the setup in terms
     580             :  * of the datatype, resampling type, and validity/density mask usage and
     581             :  * pick one of many specific implementations of the warping algorithm over
     582             :  * a continuum of optimization vs. generality.  At one end there will be a
     583             :  * reference general purpose implementation of the algorithm that supports
     584             :  * any data type (working internally in double precision complex), all three
     585             :  * resampling types, and any or all of the validity/density masks.  At the
     586             :  * other end would be highly optimized algorithms for common cases like
     587             :  * nearest neighbour resampling on GDT_Byte data with no masks.
     588             :  *
     589             :  * The full set of optimized versions have not been decided but we should
     590             :  * expect to have at least:
     591             :  *  - One for each resampling algorithm for 8bit data with no masks.
     592             :  *  - One for each resampling algorithm for float data with no masks.
     593             :  *  - One for each resampling algorithm for float data with any/all masks
     594             :  *    (essentially the generic case for just float data).
     595             :  *  - One for each resampling algorithm for 8bit data with support for
     596             :  *    input validity masks (per band or per pixel).  This handles the common
     597             :  *    case of nodata masking.
     598             :  *  - One for each resampling algorithm for float data with support for
     599             :  *    input validity masks (per band or per pixel).  This handles the common
     600             :  *    case of nodata masking.
     601             :  *
     602             :  * Some of the specializations would operate on all bands in one pass
     603             :  * (especially the ones without masking would do this), while others might
     604             :  * process each band individually to reduce code complexity.
     605             :  *
     606             :  * <h3>Masking Semantics</h3>
     607             :  *
     608             :  * A detailed explanation of the semantics of the validity and density masks,
     609             :  * and their effects on resampling kernels is needed here.
     610             :  */
     611             : 
     612             : /************************************************************************/
     613             : /*                     GDALWarpKernel Data Members                      */
     614             : /************************************************************************/
     615             : 
     616             : /**
     617             :  * \var GDALResampleAlg GDALWarpKernel::eResample;
     618             :  *
     619             :  * Resampling algorithm.
     620             :  *
     621             :  * The resampling algorithm to use.  One of GRA_NearestNeighbour, GRA_Bilinear,
     622             :  * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
     623             :  * GRA_Mode or GRA_Sum.
     624             :  *
     625             :  * This field is required. GDT_NearestNeighbour may be used as a default
     626             :  * value.
     627             :  */
     628             : 
     629             : /**
     630             :  * \var GDALDataType GDALWarpKernel::eWorkingDataType;
     631             :  *
     632             :  * Working pixel data type.
     633             :  *
     634             :  * The datatype of pixels in the source image (papabySrcimage) and
     635             :  * destination image (papabyDstImage) buffers.  Note that operations on
     636             :  * some data types (such as GDT_Byte) may be much better optimized than other
     637             :  * less common cases.
     638             :  *
     639             :  * This field is required.  It may not be GDT_Unknown.
     640             :  */
     641             : 
     642             : /**
     643             :  * \var int GDALWarpKernel::nBands;
     644             :  *
     645             :  * Number of bands.
     646             :  *
     647             :  * The number of bands (layers) of imagery being warped.  Determines the
     648             :  * number of entries in the papabySrcImage, papanBandSrcValid,
     649             :  * and papabyDstImage arrays.
     650             :  *
     651             :  * This field is required.
     652             :  */
     653             : 
     654             : /**
     655             :  * \var int GDALWarpKernel::nSrcXSize;
     656             :  *
     657             :  * Source image width in pixels.
     658             :  *
     659             :  * This field is required.
     660             :  */
     661             : 
     662             : /**
     663             :  * \var int GDALWarpKernel::nSrcYSize;
     664             :  *
     665             :  * Source image height in pixels.
     666             :  *
     667             :  * This field is required.
     668             :  */
     669             : 
     670             : /**
     671             :  * \var double GDALWarpKernel::dfSrcXExtraSize;
     672             :  *
     673             :  * Number of pixels included in nSrcXSize that are present on the edges of
     674             :  * the area of interest to take into account the width of the kernel.
     675             :  *
     676             :  * This field is required.
     677             :  */
     678             : 
     679             : /**
     680             :  * \var double GDALWarpKernel::dfSrcYExtraSize;
     681             :  *
     682             :  * Number of pixels included in nSrcYExtraSize that are present on the edges of
     683             :  * the area of interest to take into account the height of the kernel.
     684             :  *
     685             :  * This field is required.
     686             :  */
     687             : 
     688             : /**
     689             :  * \var int GDALWarpKernel::papabySrcImage;
     690             :  *
     691             :  * Array of source image band data.
     692             :  *
     693             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     694             :  * to image data.  Each individual band of image data is organized as a single
     695             :  * block of image data in left to right, then bottom to top order.  The actual
     696             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     697             :  *
     698             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     699             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     700             :  * this:
     701             :  *
     702             :  * \code
     703             :  *   float dfPixelValue;
     704             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     705             :  *   int   nPixel = 3; // Zero based.
     706             :  *   int   nLine = 4;  // Zero based.
     707             :  *
     708             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     709             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     710             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     711             :  *   dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
     712             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     713             :  * \endcode
     714             :  *
     715             :  * This field is required.
     716             :  */
     717             : 
     718             : /**
     719             :  * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
     720             :  *
     721             :  * Per band validity mask for source pixels.
     722             :  *
     723             :  * Array of pixel validity mask layers for each source band.   Each of
     724             :  * the mask layers is the same size (in pixels) as the source image with
     725             :  * one bit per pixel.  Note that it is legal (and common) for this to be
     726             :  * NULL indicating that none of the pixels are invalidated, or for some
     727             :  * band validity masks to be NULL in which case all pixels of the band are
     728             :  * valid.  The following code can be used to test the validity of a particular
     729             :  * pixel.
     730             :  *
     731             :  * \code
     732             :  *   int   bIsValid = TRUE;
     733             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     734             :  *   int   nPixel = 3; // Zero based.
     735             :  *   int   nLine = 4;  // Zero based.
     736             :  *
     737             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     738             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     739             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     740             :  *
     741             :  *   if( poKern->papanBandSrcValid != NULL
     742             :  *       && poKern->papanBandSrcValid[nBand] != NULL )
     743             :  *   {
     744             :  *       GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
     745             :  *       int    iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
     746             :  *
     747             :  *       bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
     748             :  *   }
     749             :  * \endcode
     750             :  */
     751             : 
     752             : /**
     753             :  * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
     754             :  *
     755             :  * Per pixel validity mask for source pixels.
     756             :  *
     757             :  * A single validity mask layer that applies to the pixels of all source
     758             :  * bands.  It is accessed similarly to papanBandSrcValid, but without the
     759             :  * extra level of band indirection.
     760             :  *
     761             :  * This pointer may be NULL indicating that all pixels are valid.
     762             :  *
     763             :  * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
     764             :  * the pixel isn't considered to be valid unless both arrays indicate it is
     765             :  * valid.
     766             :  */
     767             : 
     768             : /**
     769             :  * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
     770             :  *
     771             :  * Per pixel density mask for source pixels.
     772             :  *
     773             :  * A single density mask layer that applies to the pixels of all source
     774             :  * bands.  It contains values between 0.0 and 1.0 indicating the degree to
     775             :  * which this pixel should be allowed to contribute to the output result.
     776             :  *
     777             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     778             :  *
     779             :  * The density for a pixel may be accessed like this:
     780             :  *
     781             :  * \code
     782             :  *   float fDensity = 1.0;
     783             :  *   int nPixel = 3;  // Zero based.
     784             :  *   int nLine = 4;   // Zero based.
     785             :  *
     786             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     787             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     788             :  *   if( poKern->pafUnifiedSrcDensity != NULL )
     789             :  *     fDensity = poKern->pafUnifiedSrcDensity
     790             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     791             :  * \endcode
     792             :  */
     793             : 
     794             : /**
     795             :  * \var int GDALWarpKernel::nDstXSize;
     796             :  *
     797             :  * Width of destination image in pixels.
     798             :  *
     799             :  * This field is required.
     800             :  */
     801             : 
     802             : /**
     803             :  * \var int GDALWarpKernel::nDstYSize;
     804             :  *
     805             :  * Height of destination image in pixels.
     806             :  *
     807             :  * This field is required.
     808             :  */
     809             : 
     810             : /**
     811             :  * \var GByte **GDALWarpKernel::papabyDstImage;
     812             :  *
     813             :  * Array of destination image band data.
     814             :  *
     815             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     816             :  * to image data.  Each individual band of image data is organized as a single
     817             :  * block of image data in left to right, then bottom to top order.  The actual
     818             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     819             :  *
     820             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     821             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     822             :  * this:
     823             :  *
     824             :  * \code
     825             :  *   float dfPixelValue;
     826             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     827             :  *   int   nPixel = 3; // Zero based.
     828             :  *   int   nLine = 4;  // Zero based.
     829             :  *
     830             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     831             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     832             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     833             :  *   dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
     834             :  *                                  [nPixel + nLine * poKern->nSrcYSize];
     835             :  * \endcode
     836             :  *
     837             :  * This field is required.
     838             :  */
     839             : 
     840             : /**
     841             :  * \var GUInt32 *GDALWarpKernel::panDstValid;
     842             :  *
     843             :  * Per pixel validity mask for destination pixels.
     844             :  *
     845             :  * A single validity mask layer that applies to the pixels of all destination
     846             :  * bands.  It is accessed similarly to papanUnitifiedSrcValid, but based
     847             :  * on the size of the destination image.
     848             :  *
     849             :  * This pointer may be NULL indicating that all pixels are valid.
     850             :  */
     851             : 
     852             : /**
     853             :  * \var float *GDALWarpKernel::pafDstDensity;
     854             :  *
     855             :  * Per pixel density mask for destination pixels.
     856             :  *
     857             :  * A single density mask layer that applies to the pixels of all destination
     858             :  * bands.  It contains values between 0.0 and 1.0.
     859             :  *
     860             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     861             :  *
     862             :  * The density for a pixel may be accessed like this:
     863             :  *
     864             :  * \code
     865             :  *   float fDensity = 1.0;
     866             :  *   int   nPixel = 3; // Zero based.
     867             :  *   int   nLine = 4;  // Zero based.
     868             :  *
     869             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     870             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     871             :  *   if( poKern->pafDstDensity != NULL )
     872             :  *     fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
     873             :  * \endcode
     874             :  */
     875             : 
     876             : /**
     877             :  * \var int GDALWarpKernel::nSrcXOff;
     878             :  *
     879             :  * X offset to source pixel coordinates for transformation.
     880             :  *
     881             :  * See pfnTransformer.
     882             :  *
     883             :  * This field is required.
     884             :  */
     885             : 
     886             : /**
     887             :  * \var int GDALWarpKernel::nSrcYOff;
     888             :  *
     889             :  * Y offset to source pixel coordinates for transformation.
     890             :  *
     891             :  * See pfnTransformer.
     892             :  *
     893             :  * This field is required.
     894             :  */
     895             : 
     896             : /**
     897             :  * \var int GDALWarpKernel::nDstXOff;
     898             :  *
     899             :  * X offset to destination pixel coordinates for transformation.
     900             :  *
     901             :  * See pfnTransformer.
     902             :  *
     903             :  * This field is required.
     904             :  */
     905             : 
     906             : /**
     907             :  * \var int GDALWarpKernel::nDstYOff;
     908             :  *
     909             :  * Y offset to destination pixel coordinates for transformation.
     910             :  *
     911             :  * See pfnTransformer.
     912             :  *
     913             :  * This field is required.
     914             :  */
     915             : 
     916             : /**
     917             :  * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
     918             :  *
     919             :  * Source/destination location transformer.
     920             :  *
     921             :  * The function to call to transform coordinates between source image
     922             :  * pixel/line coordinates and destination image pixel/line coordinates.
     923             :  * See GDALTransformerFunc() for details of the semantics of this function.
     924             :  *
     925             :  * The GDALWarpKern algorithm will only ever use this transformer in
     926             :  * "destination to source" mode (bDstToSrc=TRUE), and will always pass
     927             :  * partial or complete scanlines of points in the destination image as
     928             :  * input.  This means, among other things, that it is safe to the
     929             :  * approximating transform GDALApproxTransform() as the transformation
     930             :  * function.
     931             :  *
     932             :  * Source and destination images may be subsets of a larger overall image.
     933             :  * The transformation algorithms will expect and return pixel/line coordinates
     934             :  * in terms of this larger image, so coordinates need to be offset by
     935             :  * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
     936             :  * passing to pfnTransformer, and after return from it.
     937             :  *
     938             :  * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
     939             :  * data to this function when it is called.
     940             :  *
     941             :  * This field is required.
     942             :  */
     943             : 
     944             : /**
     945             :  * \var void *GDALWarpKernel::pTransformerArg;
     946             :  *
     947             :  * Callback data for pfnTransformer.
     948             :  *
     949             :  * This field may be NULL if not required for the pfnTransformer being used.
     950             :  */
     951             : 
     952             : /**
     953             :  * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
     954             :  *
     955             :  * The function to call to report progress of the algorithm, and to check
     956             :  * for a requested termination of the operation.  It operates according to
     957             :  * GDALProgressFunc() semantics.
     958             :  *
     959             :  * Generally speaking the progress function will be invoked for each
     960             :  * scanline of the destination buffer that has been processed.
     961             :  *
     962             :  * This field may be NULL (internally set to GDALDummyProgress()).
     963             :  */
     964             : 
     965             : /**
     966             :  * \var void *GDALWarpKernel::pProgress;
     967             :  *
     968             :  * Callback data for pfnProgress.
     969             :  *
     970             :  * This field may be NULL if not required for the pfnProgress being used.
     971             :  */
     972             : 
     973             : /************************************************************************/
     974             : /*                           GDALWarpKernel()                           */
     975             : /************************************************************************/
     976             : 
     977        2115 : GDALWarpKernel::GDALWarpKernel()
     978             :     : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
     979             :       eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
     980             :       dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
     981             :       papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
     982             :       pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
     983             :       papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
     984             :       dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
     985             :       nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
     986             :       nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
     987             :       pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
     988             :       pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
     989             :       padfDstNoDataReal(nullptr), psThreadData(nullptr),
     990        2115 :       eTieStrategy(GWKTS_First)
     991             : {
     992        2115 : }
     993             : 
     994             : /************************************************************************/
     995             : /*                          ~GDALWarpKernel()                           */
     996             : /************************************************************************/
     997             : 
     998        2115 : GDALWarpKernel::~GDALWarpKernel()
     999             : {
    1000        2115 : }
    1001             : 
    1002             : /************************************************************************/
    1003             : /*                            PerformWarp()                             */
    1004             : /************************************************************************/
    1005             : 
    1006             : /**
    1007             :  * \fn CPLErr GDALWarpKernel::PerformWarp();
    1008             :  *
    1009             :  * This method performs the warp described in the GDALWarpKernel.
    1010             :  *
    1011             :  * @return CE_None on success or CE_Failure if an error occurs.
    1012             :  */
    1013             : 
    1014        2113 : CPLErr GDALWarpKernel::PerformWarp()
    1015             : 
    1016             : {
    1017        2113 :     const CPLErr eErr = Validate();
    1018             : 
    1019        2113 :     if (eErr != CE_None)
    1020           1 :         return eErr;
    1021             : 
    1022             :     // See #2445 and #3079.
    1023        2112 :     if (nSrcXSize <= 0 || nSrcYSize <= 0)
    1024             :     {
    1025           7 :         if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
    1026             :         {
    1027           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    1028           0 :             return CE_Failure;
    1029             :         }
    1030           7 :         return CE_None;
    1031             :     }
    1032             : 
    1033             :     /* -------------------------------------------------------------------- */
    1034             :     /*      Pre-calculate resampling scales and window sizes for filtering. */
    1035             :     /* -------------------------------------------------------------------- */
    1036             : 
    1037        2105 :     dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
    1038        2105 :     dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
    1039        2105 :     if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
    1040        1334 :         dfXScale = 1.0;
    1041        2105 :     if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
    1042        1039 :         dfYScale = 1.0;
    1043        2105 :     if (dfXScale < 1.0)
    1044             :     {
    1045         550 :         double dfXReciprocalScale = 1.0 / dfXScale;
    1046         550 :         const int nXReciprocalScale =
    1047         550 :             static_cast<int>(dfXReciprocalScale + 0.5);
    1048         550 :         if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
    1049         432 :             dfXScale = 1.0 / nXReciprocalScale;
    1050             :     }
    1051        2105 :     if (dfYScale < 1.0)
    1052             :     {
    1053         518 :         double dfYReciprocalScale = 1.0 / dfYScale;
    1054         518 :         const int nYReciprocalScale =
    1055         518 :             static_cast<int>(dfYReciprocalScale + 0.5);
    1056         518 :         if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
    1057         369 :             dfYScale = 1.0 / nYReciprocalScale;
    1058             :     }
    1059             : 
    1060             :     // XSCALE and YSCALE undocumented for now. Can help in some cases.
    1061             :     // Best would probably be a per-pixel scale computation.
    1062        2105 :     const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
    1063        2105 :     if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
    1064           1 :         dfXScale = CPLAtof(pszXScale);
    1065        2105 :     const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
    1066        2105 :     if (pszYScale != nullptr)
    1067           1 :         dfYScale = CPLAtof(pszYScale);
    1068             : 
    1069             :     // If the xscale is significantly lower than the yscale, this is highly
    1070             :     // suspicious of a situation of wrapping a very large virtual file in
    1071             :     // geographic coordinates with left and right parts being close to the
    1072             :     // antimeridian. In that situation, the xscale computed by the above method
    1073             :     // is completely wrong. Prefer doing an average of a few sample points
    1074             :     // instead
    1075        2105 :     if ((dfYScale / dfXScale > 100 ||
    1076           1 :          (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
    1077             :     {
    1078             :         // Sample points along a grid
    1079           4 :         const int nPointsX = std::min(10, nDstXSize);
    1080           4 :         const int nPointsY = std::min(10, nDstYSize);
    1081           4 :         const int nPoints = 3 * nPointsX * nPointsY;
    1082           8 :         std::vector<double> padfX;
    1083           8 :         std::vector<double> padfY;
    1084           8 :         std::vector<double> padfZ(nPoints);
    1085           8 :         std::vector<int> pabSuccess(nPoints);
    1086          44 :         for (int iY = 0; iY < nPointsY; iY++)
    1087             :         {
    1088         440 :             for (int iX = 0; iX < nPointsX; iX++)
    1089             :             {
    1090         400 :                 const double dfX =
    1091             :                     nPointsX == 1
    1092         400 :                         ? 0.0
    1093         400 :                         : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
    1094         400 :                 const double dfY =
    1095             :                     nPointsY == 1
    1096         400 :                         ? 0.0
    1097         400 :                         : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
    1098             : 
    1099             :                 // Reproject each destination sample point and its neighbours
    1100             :                 // at (x+1,y) and (x,y+1), so as to get the local scale.
    1101         400 :                 padfX.push_back(dfX);
    1102         400 :                 padfY.push_back(dfY);
    1103             : 
    1104         400 :                 padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
    1105         400 :                 padfY.push_back(dfY);
    1106             : 
    1107         400 :                 padfX.push_back(dfX);
    1108         400 :                 padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
    1109             :             }
    1110             :         }
    1111           4 :         pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
    1112           4 :                        &padfZ[0], &pabSuccess[0]);
    1113             : 
    1114             :         // Compute the xscale at each sampling point
    1115           8 :         std::vector<double> adfXScales;
    1116         404 :         for (int i = 0; i < nPoints; i += 3)
    1117             :         {
    1118         400 :             if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
    1119             :             {
    1120             :                 const double dfPointXScale =
    1121         400 :                     1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
    1122         800 :                                    std::abs(padfX[i + 2] - padfX[i]));
    1123         400 :                 adfXScales.push_back(dfPointXScale);
    1124             :             }
    1125             :         }
    1126             : 
    1127             :         // Sort by increasing xcale
    1128           4 :         std::sort(adfXScales.begin(), adfXScales.end());
    1129             : 
    1130           4 :         if (!adfXScales.empty())
    1131             :         {
    1132             :             // Compute the average of scales, but eliminate outliers small
    1133             :             // scales, if some samples are just along the discontinuity.
    1134           4 :             const double dfMaxPointXScale = adfXScales.back();
    1135           4 :             double dfSumPointXScale = 0;
    1136           4 :             int nCountPointScale = 0;
    1137         404 :             for (double dfPointXScale : adfXScales)
    1138             :             {
    1139         400 :                 if (dfPointXScale > dfMaxPointXScale / 10)
    1140             :                 {
    1141         398 :                     dfSumPointXScale += dfPointXScale;
    1142         398 :                     nCountPointScale++;
    1143             :                 }
    1144             :             }
    1145           4 :             if (nCountPointScale > 0)  // should always be true
    1146             :             {
    1147           4 :                 const double dfXScaleFromSampling =
    1148           4 :                     dfSumPointXScale / nCountPointScale;
    1149             : #if DEBUG_VERBOSE
    1150             :                 CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
    1151             :                          dfXScaleFromSampling);
    1152             : #endif
    1153           4 :                 dfXScale = dfXScaleFromSampling;
    1154             :             }
    1155             :         }
    1156             :     }
    1157             : 
    1158             : #if DEBUG_VERBOSE
    1159             :     CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
    1160             : #endif
    1161             : 
    1162        2105 :     const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
    1163             : 
    1164             :     // Safety check for callers that would use GDALWarpKernel without using
    1165             :     // GDALWarpOperation.
    1166        2042 :     if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
    1167        1979 :          ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
    1168        4210 :           !bUse4SamplesFormula)) &&
    1169         388 :         atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
    1170             :             WARP_EXTRA_ELTS)
    1171             :     {
    1172           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1173             :                  "Source arrays must have WARP_EXTRA_ELTS extra elements at "
    1174             :                  "their end. "
    1175             :                  "See GDALWarpKernel class definition. If this condition is "
    1176             :                  "fulfilled, define a EXTRA_ELTS=%d warp options",
    1177             :                  WARP_EXTRA_ELTS);
    1178           0 :         return CE_Failure;
    1179             :     }
    1180             : 
    1181        2105 :     dfXFilter = anGWKFilterRadius[eResample];
    1182        2105 :     dfYFilter = anGWKFilterRadius[eResample];
    1183             : 
    1184        2105 :     nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
    1185        1637 :                               : static_cast<int>(dfXFilter);
    1186        2105 :     nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
    1187        1615 :                               : static_cast<int>(dfYFilter);
    1188             : 
    1189             :     // Filter window offset depends on the parity of the kernel radius.
    1190        2105 :     nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
    1191        2105 :     nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
    1192             : 
    1193        2105 :     bApplyVerticalShift =
    1194        2105 :         CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
    1195        2105 :     dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
    1196        2105 :         papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
    1197             : 
    1198             :     /* -------------------------------------------------------------------- */
    1199             :     /*      Set up resampling functions.                                    */
    1200             :     /* -------------------------------------------------------------------- */
    1201        2105 :     if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
    1202          12 :         return GWKGeneralCase(this);
    1203             : 
    1204        2093 :     const bool bNoMasksOrDstDensityOnly =
    1205        2089 :         papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
    1206        4182 :         pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
    1207             : 
    1208        2093 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour &&
    1209             :         bNoMasksOrDstDensityOnly)
    1210         896 :         return GWKNearestNoMasksOrDstDensityOnlyByte(this);
    1211             : 
    1212        1197 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear &&
    1213             :         bNoMasksOrDstDensityOnly)
    1214         126 :         return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
    1215             : 
    1216        1071 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_Cubic &&
    1217             :         bNoMasksOrDstDensityOnly)
    1218          72 :         return GWKCubicNoMasksOrDstDensityOnlyByte(this);
    1219             : 
    1220         999 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline &&
    1221             :         bNoMasksOrDstDensityOnly)
    1222          12 :         return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
    1223             : 
    1224         987 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour)
    1225         324 :         return GWKNearestByte(this);
    1226             : 
    1227         663 :     if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
    1228         122 :         eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
    1229          14 :         return GWKNearestNoMasksOrDstDensityOnlyShort(this);
    1230             : 
    1231         649 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
    1232             :         bNoMasksOrDstDensityOnly)
    1233           5 :         return GWKCubicNoMasksOrDstDensityOnlyShort(this);
    1234             : 
    1235         644 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
    1236             :         bNoMasksOrDstDensityOnly)
    1237           6 :         return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
    1238             : 
    1239         638 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
    1240             :         bNoMasksOrDstDensityOnly)
    1241           5 :         return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
    1242             : 
    1243         633 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
    1244             :         bNoMasksOrDstDensityOnly)
    1245          12 :         return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
    1246             : 
    1247         621 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
    1248             :         bNoMasksOrDstDensityOnly)
    1249           5 :         return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
    1250             : 
    1251         616 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
    1252             :         bNoMasksOrDstDensityOnly)
    1253           6 :         return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
    1254             : 
    1255         610 :     if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
    1256          23 :         return GWKNearestShort(this);
    1257             : 
    1258         587 :     if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
    1259           0 :         return GWKNearestUnsignedShort(this);
    1260             : 
    1261         587 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
    1262             :         bNoMasksOrDstDensityOnly)
    1263          11 :         return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
    1264             : 
    1265         576 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
    1266          37 :         return GWKNearestFloat(this);
    1267             : 
    1268         539 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
    1269             :         bNoMasksOrDstDensityOnly)
    1270           4 :         return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
    1271             : 
    1272         535 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
    1273             :         bNoMasksOrDstDensityOnly)
    1274           9 :         return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
    1275             : 
    1276             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    1277             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
    1278             :         bNoMasksOrDstDensityOnly)
    1279             :         return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
    1280             : 
    1281             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
    1282             :         bNoMasksOrDstDensityOnly)
    1283             :         return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
    1284             : #endif
    1285             : 
    1286         526 :     if (eResample == GRA_Average)
    1287          71 :         return GWKAverageOrMode(this);
    1288             : 
    1289         455 :     if (eResample == GRA_RMS)
    1290           9 :         return GWKAverageOrMode(this);
    1291             : 
    1292         446 :     if (eResample == GRA_Mode)
    1293          23 :         return GWKAverageOrMode(this);
    1294             : 
    1295         423 :     if (eResample == GRA_Max)
    1296           6 :         return GWKAverageOrMode(this);
    1297             : 
    1298         417 :     if (eResample == GRA_Min)
    1299           5 :         return GWKAverageOrMode(this);
    1300             : 
    1301         412 :     if (eResample == GRA_Med)
    1302           6 :         return GWKAverageOrMode(this);
    1303             : 
    1304         406 :     if (eResample == GRA_Q1)
    1305           5 :         return GWKAverageOrMode(this);
    1306             : 
    1307         401 :     if (eResample == GRA_Q3)
    1308           5 :         return GWKAverageOrMode(this);
    1309             : 
    1310         396 :     if (eResample == GRA_Sum)
    1311          18 :         return GWKSumPreserving(this);
    1312             : 
    1313         378 :     if (!GDALDataTypeIsComplex(eWorkingDataType))
    1314             :     {
    1315         151 :         return GWKRealCase(this);
    1316             :     }
    1317             : 
    1318         227 :     return GWKGeneralCase(this);
    1319             : }
    1320             : 
    1321             : /************************************************************************/
    1322             : /*                              Validate()                              */
    1323             : /************************************************************************/
    1324             : 
    1325             : /**
    1326             :  * \fn CPLErr GDALWarpKernel::Validate()
    1327             :  *
    1328             :  * Check the settings in the GDALWarpKernel, and issue a CPLError()
    1329             :  * (and return CE_Failure) if the configuration is considered to be
    1330             :  * invalid for some reason.
    1331             :  *
    1332             :  * This method will also do some standard defaulting such as setting
    1333             :  * pfnProgress to GDALDummyProgress() if it is NULL.
    1334             :  *
    1335             :  * @return CE_None on success or CE_Failure if an error is detected.
    1336             :  */
    1337             : 
    1338        2113 : CPLErr GDALWarpKernel::Validate()
    1339             : 
    1340             : {
    1341        2113 :     if (static_cast<size_t>(eResample) >=
    1342             :         (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
    1343             :     {
    1344           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1345             :                  "Unsupported resampling method %d.",
    1346           0 :                  static_cast<int>(eResample));
    1347           0 :         return CE_Failure;
    1348             :     }
    1349             : 
    1350             :     // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
    1351             :     // be ignored as contributing source pixels during resampling. Only taken into account by
    1352             :     // Average currently
    1353             :     const char *pszExcludedValues =
    1354        2113 :         CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
    1355        2113 :     if (pszExcludedValues)
    1356             :     {
    1357             :         const CPLStringList aosTokens(
    1358           8 :             CSLTokenizeString2(pszExcludedValues, "(,)", 0));
    1359           8 :         if ((aosTokens.size() % nBands) != 0)
    1360             :         {
    1361           1 :             CPLError(CE_Failure, CPLE_AppDefined,
    1362             :                      "EXCLUDED_VALUES should contain one or several tuples of "
    1363             :                      "%d values formatted like <R>,<G>,<B> or "
    1364             :                      "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
    1365             :                      "tuples",
    1366             :                      nBands);
    1367           1 :             return CE_Failure;
    1368             :         }
    1369          14 :         std::vector<double> adfTuple;
    1370          28 :         for (int i = 0; i < aosTokens.size(); ++i)
    1371             :         {
    1372          21 :             adfTuple.push_back(CPLAtof(aosTokens[i]));
    1373          21 :             if (((i + 1) % nBands) == 0)
    1374             :             {
    1375           7 :                 m_aadfExcludedValues.push_back(adfTuple);
    1376           7 :                 adfTuple.clear();
    1377             :             }
    1378             :         }
    1379             :     }
    1380             : 
    1381        2112 :     return CE_None;
    1382             : }
    1383             : 
    1384             : /************************************************************************/
    1385             : /*                         GWKOverlayDensity()                          */
    1386             : /*                                                                      */
    1387             : /*      Compute the final density for the destination pixel.  This      */
    1388             : /*      is a function of the overlay density (passed in) and the        */
    1389             : /*      original density.                                               */
    1390             : /************************************************************************/
    1391             : 
    1392     8933090 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
    1393             :                               double dfDensity)
    1394             : {
    1395     8933090 :     if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
    1396     7742210 :         return;
    1397             : 
    1398     1190880 :     poWK->pafDstDensity[iDstOffset] = static_cast<float>(
    1399     1190880 :         1.0 - (1.0 - dfDensity) * (1.0 - poWK->pafDstDensity[iDstOffset]));
    1400             : }
    1401             : 
    1402             : /************************************************************************/
    1403             : /*                          GWKRoundValueT()                            */
    1404             : /************************************************************************/
    1405             : 
    1406             : template <class T, bool is_signed> struct sGWKRoundValueT
    1407             : {
    1408             :     static T eval(double);
    1409             : };
    1410             : 
    1411             : template <class T> struct sGWKRoundValueT<T, true> /* signed */
    1412             : {
    1413      791525 :     static T eval(double dfValue)
    1414             :     {
    1415      791525 :         return static_cast<T>(floor(dfValue + 0.5));
    1416             :     }
    1417             : };
    1418             : 
    1419             : template <class T> struct sGWKRoundValueT<T, false> /* unsigned */
    1420             : {
    1421    12954881 :     static T eval(double dfValue)
    1422             :     {
    1423    12954881 :         return static_cast<T>(dfValue + 0.5);
    1424             :     }
    1425             : };
    1426             : 
    1427    13740806 : template <class T> static T GWKRoundValueT(double dfValue)
    1428             : {
    1429    13740806 :     return sGWKRoundValueT<T, cpl::NumericLimits<T>::is_signed>::eval(dfValue);
    1430             : }
    1431             : 
    1432      268974 : template <> float GWKRoundValueT<float>(double dfValue)
    1433             : {
    1434      268974 :     return static_cast<float>(dfValue);
    1435             : }
    1436             : 
    1437             : #ifdef notused
    1438             : template <> double GWKRoundValueT<double>(double dfValue)
    1439             : {
    1440             :     return dfValue;
    1441             : }
    1442             : #endif
    1443             : 
    1444             : /************************************************************************/
    1445             : /*                            GWKClampValueT()                          */
    1446             : /************************************************************************/
    1447             : 
    1448    10313444 : template <class T> static CPL_INLINE T GWKClampValueT(double dfValue)
    1449             : {
    1450    10313444 :     if (dfValue < cpl::NumericLimits<T>::min())
    1451        3969 :         return cpl::NumericLimits<T>::min();
    1452    10364686 :     else if (dfValue > cpl::NumericLimits<T>::max())
    1453       18463 :         return cpl::NumericLimits<T>::max();
    1454             :     else
    1455    10321116 :         return GWKRoundValueT<T>(dfValue);
    1456             : }
    1457             : 
    1458      718914 : template <> float GWKClampValueT<float>(double dfValue)
    1459             : {
    1460      718914 :     return static_cast<float>(dfValue);
    1461             : }
    1462             : 
    1463             : #ifdef notused
    1464             : template <> double GWKClampValueT<double>(double dfValue)
    1465             : {
    1466             :     return dfValue;
    1467             : }
    1468             : #endif
    1469             : 
    1470             : /************************************************************************/
    1471             : /*                             AvoidNoData()                            */
    1472             : /************************************************************************/
    1473             : 
    1474             : template <class T>
    1475    12865062 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1476             :                         GPtrDiff_t iDstOffset)
    1477             : {
    1478    12865062 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1479    12865062 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1480             : 
    1481    12865062 :     if (poWK->padfDstNoDataReal != nullptr &&
    1482     6729947 :         poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
    1483             :     {
    1484             :         if constexpr (cpl::NumericLimits<T>::is_integer)
    1485             :         {
    1486        2637 :             if (pDst[iDstOffset] ==
    1487        2637 :                 static_cast<T>(cpl::NumericLimits<T>::lowest()))
    1488             :             {
    1489        2509 :                 pDst[iDstOffset] =
    1490        2509 :                     static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
    1491             :             }
    1492             :             else
    1493         128 :                 pDst[iDstOffset]--;
    1494             :         }
    1495             :         else
    1496             :         {
    1497          64 :             if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
    1498             :             {
    1499             :                 using std::nextafter;
    1500           0 :                 pDst[iDstOffset] =
    1501           0 :                     nextafter(pDst[iDstOffset], static_cast<T>(0));
    1502             :             }
    1503             :             else
    1504             :             {
    1505             :                 using std::nextafter;
    1506          64 :                 pDst[iDstOffset] =
    1507          64 :                     nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
    1508             :             }
    1509             :         }
    1510             : 
    1511        2701 :         if (!poWK->bWarnedAboutDstNoDataReplacement)
    1512             :         {
    1513          25 :             const_cast<GDALWarpKernel *>(poWK)
    1514             :                 ->bWarnedAboutDstNoDataReplacement = true;
    1515          25 :             CPLError(CE_Warning, CPLE_AppDefined,
    1516             :                      "Value %g in the source dataset has been changed to %g "
    1517             :                      "in the destination dataset to avoid being treated as "
    1518             :                      "NoData. To avoid this, select a different NoData value "
    1519             :                      "for the destination dataset.",
    1520          25 :                      poWK->padfDstNoDataReal[iBand],
    1521          25 :                      static_cast<double>(pDst[iDstOffset]));
    1522             :         }
    1523             :     }
    1524    12865062 : }
    1525             : 
    1526             : /************************************************************************/
    1527             : /*                         GWKSetPixelValueRealT()                      */
    1528             : /************************************************************************/
    1529             : 
    1530             : template <class T>
    1531     8159107 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
    1532             :                                   GPtrDiff_t iDstOffset, double dfDensity,
    1533             :                                   T value)
    1534             : {
    1535     8159107 :     T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    1536             : 
    1537             :     /* -------------------------------------------------------------------- */
    1538             :     /*      If the source density is less than 100% we need to fetch the    */
    1539             :     /*      existing destination value, and mix it with the source to       */
    1540             :     /*      get the new "to apply" value.  Also compute composite           */
    1541             :     /*      density.                                                        */
    1542             :     /*                                                                      */
    1543             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1544             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1545             :     /* -------------------------------------------------------------------- */
    1546     8159107 :     if (dfDensity < 0.9999)
    1547             :     {
    1548       81504 :         if (dfDensity < 0.0001)
    1549           0 :             return true;
    1550             : 
    1551       81504 :         double dfDstDensity = 1.0;
    1552             : 
    1553       81504 :         if (poWK->pafDstDensity != nullptr)
    1554       80032 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1555        1472 :         else if (poWK->panDstValid != nullptr &&
    1556           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1557           0 :             dfDstDensity = 0.0;
    1558             : 
    1559             :         // It seems like we also ought to be testing panDstValid[] here!
    1560             : 
    1561       81504 :         const double dfDstReal = pDst[iDstOffset];
    1562             : 
    1563             :         // The destination density is really only relative to the portion
    1564             :         // not occluded by the overlay.
    1565       81504 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1566             : 
    1567       81504 :         const double dfReal = (value * dfDensity + dfDstReal * dfDstInfluence) /
    1568       81504 :                               (dfDensity + dfDstInfluence);
    1569             : 
    1570             :         /* --------------------------------------------------------------------
    1571             :          */
    1572             :         /*      Actually apply the destination value. */
    1573             :         /*                                                                      */
    1574             :         /*      Avoid using the destination nodata value for integer datatypes
    1575             :          */
    1576             :         /*      if by chance it is equal to the computed pixel value. */
    1577             :         /* --------------------------------------------------------------------
    1578             :          */
    1579       81504 :         pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
    1580             :     }
    1581             :     else
    1582             :     {
    1583     8077598 :         pDst[iDstOffset] = value;
    1584             :     }
    1585             : 
    1586     8159107 :     AvoidNoData<T>(poWK, iBand, iDstOffset);
    1587             : 
    1588     8159107 :     return true;
    1589             : }
    1590             : 
    1591             : /************************************************************************/
    1592             : /*                       ClampRoundAndAvoidNoData()                     */
    1593             : /************************************************************************/
    1594             : 
    1595             : template <class T>
    1596     4705975 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1597             :                                      GPtrDiff_t iDstOffset, double dfReal)
    1598             : {
    1599     4705975 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1600     4705975 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1601             : 
    1602             :     if constexpr (cpl::NumericLimits<T>::is_integer)
    1603             :     {
    1604             :         using std::floor;
    1605     4223079 :         if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
    1606        1638 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
    1607     4221439 :         else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1608       13640 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
    1609             :         else if constexpr (cpl::NumericLimits<T>::is_signed)
    1610       13239 :             pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
    1611             :         else
    1612     4194560 :             pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
    1613             :     }
    1614             :     else
    1615             :     {
    1616      482896 :         pDst[iDstOffset] = static_cast<T>(dfReal);
    1617             :     }
    1618             : 
    1619     4705975 :     AvoidNoData<T>(poWK, iBand, iDstOffset);
    1620     4705975 : }
    1621             : 
    1622             : /************************************************************************/
    1623             : /*                          GWKSetPixelValue()                          */
    1624             : /************************************************************************/
    1625             : 
    1626     3867240 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
    1627             :                              GPtrDiff_t iDstOffset, double dfDensity,
    1628             :                              double dfReal, double dfImag)
    1629             : 
    1630             : {
    1631     3867240 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1632             : 
    1633             :     /* -------------------------------------------------------------------- */
    1634             :     /*      If the source density is less than 100% we need to fetch the    */
    1635             :     /*      existing destination value, and mix it with the source to       */
    1636             :     /*      get the new "to apply" value.  Also compute composite           */
    1637             :     /*      density.                                                        */
    1638             :     /*                                                                      */
    1639             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1640             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1641             :     /* -------------------------------------------------------------------- */
    1642     3867240 :     if (dfDensity < 0.9999)
    1643             :     {
    1644         800 :         if (dfDensity < 0.0001)
    1645           0 :             return true;
    1646             : 
    1647         800 :         double dfDstDensity = 1.0;
    1648         800 :         if (poWK->pafDstDensity != nullptr)
    1649         800 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1650           0 :         else if (poWK->panDstValid != nullptr &&
    1651           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1652           0 :             dfDstDensity = 0.0;
    1653             : 
    1654         800 :         double dfDstReal = 0.0;
    1655         800 :         double dfDstImag = 0.0;
    1656             :         // It seems like we also ought to be testing panDstValid[] here!
    1657             : 
    1658             :         // TODO(schwehr): Factor out this repreated type of set.
    1659         800 :         switch (poWK->eWorkingDataType)
    1660             :         {
    1661           0 :             case GDT_Byte:
    1662           0 :                 dfDstReal = pabyDst[iDstOffset];
    1663           0 :                 dfDstImag = 0.0;
    1664           0 :                 break;
    1665             : 
    1666           0 :             case GDT_Int8:
    1667           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1668           0 :                 dfDstImag = 0.0;
    1669           0 :                 break;
    1670             : 
    1671         400 :             case GDT_Int16:
    1672         400 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1673         400 :                 dfDstImag = 0.0;
    1674         400 :                 break;
    1675             : 
    1676         400 :             case GDT_UInt16:
    1677         400 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1678         400 :                 dfDstImag = 0.0;
    1679         400 :                 break;
    1680             : 
    1681           0 :             case GDT_Int32:
    1682           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1683           0 :                 dfDstImag = 0.0;
    1684           0 :                 break;
    1685             : 
    1686           0 :             case GDT_UInt32:
    1687           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1688           0 :                 dfDstImag = 0.0;
    1689           0 :                 break;
    1690             : 
    1691           0 :             case GDT_Int64:
    1692           0 :                 dfDstReal = static_cast<double>(
    1693           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1694           0 :                 dfDstImag = 0.0;
    1695           0 :                 break;
    1696             : 
    1697           0 :             case GDT_UInt64:
    1698           0 :                 dfDstReal = static_cast<double>(
    1699           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1700           0 :                 dfDstImag = 0.0;
    1701           0 :                 break;
    1702             : 
    1703           0 :             case GDT_Float16:
    1704           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    1705           0 :                 dfDstImag = 0.0;
    1706           0 :                 break;
    1707             : 
    1708           0 :             case GDT_Float32:
    1709           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
    1710           0 :                 dfDstImag = 0.0;
    1711           0 :                 break;
    1712             : 
    1713           0 :             case GDT_Float64:
    1714           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1715           0 :                 dfDstImag = 0.0;
    1716           0 :                 break;
    1717             : 
    1718           0 :             case GDT_CInt16:
    1719           0 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
    1720           0 :                 dfDstImag =
    1721           0 :                     reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
    1722           0 :                 break;
    1723             : 
    1724           0 :             case GDT_CInt32:
    1725           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
    1726           0 :                 dfDstImag =
    1727           0 :                     reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
    1728           0 :                 break;
    1729             : 
    1730           0 :             case GDT_CFloat16:
    1731             :                 dfDstReal =
    1732           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
    1733             :                 dfDstImag =
    1734           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
    1735           0 :                 break;
    1736             : 
    1737           0 :             case GDT_CFloat32:
    1738           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset * 2];
    1739           0 :                 dfDstImag =
    1740           0 :                     reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1];
    1741           0 :                 break;
    1742             : 
    1743           0 :             case GDT_CFloat64:
    1744           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
    1745           0 :                 dfDstImag =
    1746           0 :                     reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
    1747           0 :                 break;
    1748             : 
    1749           0 :             case GDT_Unknown:
    1750             :             case GDT_TypeCount:
    1751           0 :                 CPLAssert(false);
    1752             :                 return false;
    1753             :         }
    1754             : 
    1755             :         // The destination density is really only relative to the portion
    1756             :         // not occluded by the overlay.
    1757         800 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1758             : 
    1759         800 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    1760         800 :                  (dfDensity + dfDstInfluence);
    1761             : 
    1762         800 :         dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
    1763         800 :                  (dfDensity + dfDstInfluence);
    1764             :     }
    1765             : 
    1766             :     /* -------------------------------------------------------------------- */
    1767             :     /*      Actually apply the destination value.                           */
    1768             :     /*                                                                      */
    1769             :     /*      Avoid using the destination nodata value for integer datatypes  */
    1770             :     /*      if by chance it is equal to the computed pixel value.           */
    1771             :     /* -------------------------------------------------------------------- */
    1772             : 
    1773     3867240 :     switch (poWK->eWorkingDataType)
    1774             :     {
    1775     3141450 :         case GDT_Byte:
    1776     3141450 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
    1777     3141450 :             break;
    1778             : 
    1779           0 :         case GDT_Int8:
    1780           0 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
    1781           0 :             break;
    1782             : 
    1783        7470 :         case GDT_Int16:
    1784        7470 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
    1785        7470 :             break;
    1786             : 
    1787         463 :         case GDT_UInt16:
    1788         463 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
    1789         463 :             break;
    1790             : 
    1791          63 :         case GDT_UInt32:
    1792          63 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
    1793          63 :             break;
    1794             : 
    1795        3470 :         case GDT_Int32:
    1796        3470 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
    1797        3470 :             break;
    1798             : 
    1799           0 :         case GDT_UInt64:
    1800           0 :             ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
    1801             :                                                     dfReal);
    1802           0 :             break;
    1803             : 
    1804           0 :         case GDT_Int64:
    1805           0 :             ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
    1806             :                                                    dfReal);
    1807           0 :             break;
    1808             : 
    1809           0 :         case GDT_Float16:
    1810           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
    1811           0 :             break;
    1812             : 
    1813      478957 :         case GDT_Float32:
    1814      478957 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
    1815      478957 :             break;
    1816             : 
    1817         147 :         case GDT_Float64:
    1818         147 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
    1819         147 :             break;
    1820             : 
    1821      234078 :         case GDT_CInt16:
    1822             :         {
    1823             :             typedef GInt16 T;
    1824      234078 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    1825           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1826           0 :                     cpl::NumericLimits<T>::min();
    1827      234078 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1828           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1829           0 :                     cpl::NumericLimits<T>::max();
    1830             :             else
    1831      234078 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1832      234078 :                     static_cast<T>(floor(dfReal + 0.5));
    1833      234078 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    1834           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1835           0 :                     cpl::NumericLimits<T>::min();
    1836      234078 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    1837           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1838           0 :                     cpl::NumericLimits<T>::max();
    1839             :             else
    1840      234078 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1841      234078 :                     static_cast<T>(floor(dfImag + 0.5));
    1842      234078 :             break;
    1843             :         }
    1844             : 
    1845         378 :         case GDT_CInt32:
    1846             :         {
    1847             :             typedef GInt32 T;
    1848         378 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    1849           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1850           0 :                     cpl::NumericLimits<T>::min();
    1851         378 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1852           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1853           0 :                     cpl::NumericLimits<T>::max();
    1854             :             else
    1855         378 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1856         378 :                     static_cast<T>(floor(dfReal + 0.5));
    1857         378 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    1858           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1859           0 :                     cpl::NumericLimits<T>::min();
    1860         378 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    1861           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1862           0 :                     cpl::NumericLimits<T>::max();
    1863             :             else
    1864         378 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1865         378 :                     static_cast<T>(floor(dfImag + 0.5));
    1866         378 :             break;
    1867             :         }
    1868             : 
    1869           0 :         case GDT_CFloat16:
    1870           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
    1871           0 :                 static_cast<GFloat16>(dfReal);
    1872           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
    1873           0 :                 static_cast<GFloat16>(dfImag);
    1874           0 :             break;
    1875             : 
    1876         390 :         case GDT_CFloat32:
    1877         390 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
    1878         390 :                 static_cast<float>(dfReal);
    1879         390 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
    1880         390 :                 static_cast<float>(dfImag);
    1881         390 :             break;
    1882             : 
    1883         378 :         case GDT_CFloat64:
    1884         378 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
    1885         378 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
    1886         378 :             break;
    1887             : 
    1888           0 :         case GDT_Unknown:
    1889             :         case GDT_TypeCount:
    1890           0 :             return false;
    1891             :     }
    1892             : 
    1893     3867240 :     return true;
    1894             : }
    1895             : 
    1896             : /************************************************************************/
    1897             : /*                       GWKSetPixelValueReal()                         */
    1898             : /************************************************************************/
    1899             : 
    1900     1073960 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    1901             :                                  GPtrDiff_t iDstOffset, double dfDensity,
    1902             :                                  double dfReal)
    1903             : 
    1904             : {
    1905     1073960 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1906             : 
    1907             :     /* -------------------------------------------------------------------- */
    1908             :     /*      If the source density is less than 100% we need to fetch the    */
    1909             :     /*      existing destination value, and mix it with the source to       */
    1910             :     /*      get the new "to apply" value.  Also compute composite           */
    1911             :     /*      density.                                                        */
    1912             :     /*                                                                      */
    1913             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1914             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1915             :     /* -------------------------------------------------------------------- */
    1916     1073960 :     if (dfDensity < 0.9999)
    1917             :     {
    1918       78172 :         if (dfDensity < 0.0001)
    1919           0 :             return true;
    1920             : 
    1921       78172 :         double dfDstReal = 0.0;
    1922       78172 :         double dfDstDensity = 1.0;
    1923             : 
    1924       78172 :         if (poWK->pafDstDensity != nullptr)
    1925       78172 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1926           0 :         else if (poWK->panDstValid != nullptr &&
    1927           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1928           0 :             dfDstDensity = 0.0;
    1929             : 
    1930             :         // It seems like we also ought to be testing panDstValid[] here!
    1931             : 
    1932       78172 :         switch (poWK->eWorkingDataType)
    1933             :         {
    1934           0 :             case GDT_Byte:
    1935           0 :                 dfDstReal = pabyDst[iDstOffset];
    1936           0 :                 break;
    1937             : 
    1938           0 :             case GDT_Int8:
    1939           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1940           0 :                 break;
    1941             : 
    1942         300 :             case GDT_Int16:
    1943         300 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1944         300 :                 break;
    1945             : 
    1946       77872 :             case GDT_UInt16:
    1947       77872 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1948       77872 :                 break;
    1949             : 
    1950           0 :             case GDT_Int32:
    1951           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1952           0 :                 break;
    1953             : 
    1954           0 :             case GDT_UInt32:
    1955           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1956           0 :                 break;
    1957             : 
    1958           0 :             case GDT_Int64:
    1959           0 :                 dfDstReal = static_cast<double>(
    1960           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1961           0 :                 break;
    1962             : 
    1963           0 :             case GDT_UInt64:
    1964           0 :                 dfDstReal = static_cast<double>(
    1965           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1966           0 :                 break;
    1967             : 
    1968           0 :             case GDT_Float16:
    1969           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    1970           0 :                 break;
    1971             : 
    1972           0 :             case GDT_Float32:
    1973           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
    1974           0 :                 break;
    1975             : 
    1976           0 :             case GDT_Float64:
    1977           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1978           0 :                 break;
    1979             : 
    1980           0 :             case GDT_CInt16:
    1981             :             case GDT_CInt32:
    1982             :             case GDT_CFloat16:
    1983             :             case GDT_CFloat32:
    1984             :             case GDT_CFloat64:
    1985             :             case GDT_Unknown:
    1986             :             case GDT_TypeCount:
    1987           0 :                 CPLAssert(false);
    1988             :                 return false;
    1989             :         }
    1990             : 
    1991             :         // The destination density is really only relative to the portion
    1992             :         // not occluded by the overlay.
    1993       78172 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1994             : 
    1995       78172 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    1996       78172 :                  (dfDensity + dfDstInfluence);
    1997             :     }
    1998             : 
    1999             :     /* -------------------------------------------------------------------- */
    2000             :     /*      Actually apply the destination value.                           */
    2001             :     /*                                                                      */
    2002             :     /*      Avoid using the destination nodata value for integer datatypes  */
    2003             :     /*      if by chance it is equal to the computed pixel value.           */
    2004             :     /* -------------------------------------------------------------------- */
    2005             : 
    2006     1073960 :     switch (poWK->eWorkingDataType)
    2007             :     {
    2008      916752 :         case GDT_Byte:
    2009      916752 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
    2010      916752 :             break;
    2011             : 
    2012           0 :         case GDT_Int8:
    2013           0 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
    2014           0 :             break;
    2015             : 
    2016        1117 :         case GDT_Int16:
    2017        1117 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
    2018        1117 :             break;
    2019             : 
    2020      150735 :         case GDT_UInt16:
    2021      150735 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
    2022      150735 :             break;
    2023             : 
    2024         347 :         case GDT_UInt32:
    2025         347 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
    2026         347 :             break;
    2027             : 
    2028        1150 :         case GDT_Int32:
    2029        1150 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
    2030        1150 :             break;
    2031             : 
    2032          32 :         case GDT_UInt64:
    2033          32 :             ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
    2034             :                                                     dfReal);
    2035          32 :             break;
    2036             : 
    2037          32 :         case GDT_Int64:
    2038          32 :             ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
    2039             :                                                    dfReal);
    2040          32 :             break;
    2041             : 
    2042           0 :         case GDT_Float16:
    2043           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
    2044           0 :             break;
    2045             : 
    2046        3442 :         case GDT_Float32:
    2047        3442 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
    2048        3442 :             break;
    2049             : 
    2050         350 :         case GDT_Float64:
    2051         350 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
    2052         350 :             break;
    2053             : 
    2054           0 :         case GDT_CInt16:
    2055             :         case GDT_CInt32:
    2056             :         case GDT_CFloat16:
    2057             :         case GDT_CFloat32:
    2058             :         case GDT_CFloat64:
    2059           0 :             return false;
    2060             : 
    2061           0 :         case GDT_Unknown:
    2062             :         case GDT_TypeCount:
    2063           0 :             CPLAssert(false);
    2064             :             return false;
    2065             :     }
    2066             : 
    2067     1073960 :     return true;
    2068             : }
    2069             : 
    2070             : /************************************************************************/
    2071             : /*                          GWKGetPixelValue()                          */
    2072             : /************************************************************************/
    2073             : 
    2074             : /* It is assumed that panUnifiedSrcValid has been checked before */
    2075             : 
    2076    29336100 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
    2077             :                              GPtrDiff_t iSrcOffset, double *pdfDensity,
    2078             :                              double *pdfReal, double *pdfImag)
    2079             : 
    2080             : {
    2081    29336100 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2082             : 
    2083    58672300 :     if (poWK->papanBandSrcValid != nullptr &&
    2084    29336100 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2085           0 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2086             :     {
    2087           0 :         *pdfDensity = 0.0;
    2088           0 :         return false;
    2089             :     }
    2090             : 
    2091    29336100 :     *pdfReal = 0.0;
    2092    29336100 :     *pdfImag = 0.0;
    2093             : 
    2094             :     // TODO(schwehr): Fix casting.
    2095    29336100 :     switch (poWK->eWorkingDataType)
    2096             :     {
    2097    28245600 :         case GDT_Byte:
    2098    28245600 :             *pdfReal = pabySrc[iSrcOffset];
    2099    28245600 :             *pdfImag = 0.0;
    2100    28245600 :             break;
    2101             : 
    2102           0 :         case GDT_Int8:
    2103           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2104           0 :             *pdfImag = 0.0;
    2105           0 :             break;
    2106             : 
    2107       28226 :         case GDT_Int16:
    2108       28226 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2109       28226 :             *pdfImag = 0.0;
    2110       28226 :             break;
    2111             : 
    2112         163 :         case GDT_UInt16:
    2113         163 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2114         163 :             *pdfImag = 0.0;
    2115         163 :             break;
    2116             : 
    2117       13726 :         case GDT_Int32:
    2118       13726 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2119       13726 :             *pdfImag = 0.0;
    2120       13726 :             break;
    2121             : 
    2122          63 :         case GDT_UInt32:
    2123          63 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2124          63 :             *pdfImag = 0.0;
    2125          63 :             break;
    2126             : 
    2127           0 :         case GDT_Int64:
    2128           0 :             *pdfReal = static_cast<double>(
    2129           0 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2130           0 :             *pdfImag = 0.0;
    2131           0 :             break;
    2132             : 
    2133           0 :         case GDT_UInt64:
    2134           0 :             *pdfReal = static_cast<double>(
    2135           0 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2136           0 :             *pdfImag = 0.0;
    2137           0 :             break;
    2138             : 
    2139           0 :         case GDT_Float16:
    2140           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2141           0 :             *pdfImag = 0.0;
    2142           0 :             break;
    2143             : 
    2144     1047220 :         case GDT_Float32:
    2145     1047220 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
    2146     1047220 :             *pdfImag = 0.0;
    2147     1047220 :             break;
    2148             : 
    2149         582 :         case GDT_Float64:
    2150         582 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2151         582 :             *pdfImag = 0.0;
    2152         582 :             break;
    2153             : 
    2154         130 :         case GDT_CInt16:
    2155         130 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
    2156         130 :             *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2157         130 :             break;
    2158             : 
    2159         130 :         case GDT_CInt32:
    2160         130 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
    2161         130 :             *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
    2162         130 :             break;
    2163             : 
    2164           0 :         case GDT_CFloat16:
    2165           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
    2166           0 :             *pdfImag =
    2167           0 :                 reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2168           0 :             break;
    2169             : 
    2170         178 :         case GDT_CFloat32:
    2171         178 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2];
    2172         178 :             *pdfImag = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1];
    2173         178 :             break;
    2174             : 
    2175         130 :         case GDT_CFloat64:
    2176         130 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
    2177         130 :             *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
    2178         130 :             break;
    2179             : 
    2180           0 :         case GDT_Unknown:
    2181             :         case GDT_TypeCount:
    2182           0 :             CPLAssert(false);
    2183             :             *pdfDensity = 0.0;
    2184             :             return false;
    2185             :     }
    2186             : 
    2187    29336100 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2188     3015160 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2189             :     else
    2190    26321000 :         *pdfDensity = 1.0;
    2191             : 
    2192    29336100 :     return *pdfDensity != 0.0;
    2193             : }
    2194             : 
    2195             : /************************************************************************/
    2196             : /*                       GWKGetPixelValueReal()                         */
    2197             : /************************************************************************/
    2198             : 
    2199      151448 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2200             :                                  GPtrDiff_t iSrcOffset, double *pdfDensity,
    2201             :                                  double *pdfReal)
    2202             : 
    2203             : {
    2204      151448 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2205             : 
    2206      302898 :     if (poWK->papanBandSrcValid != nullptr &&
    2207      151450 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2208           2 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2209             :     {
    2210           0 :         *pdfDensity = 0.0;
    2211           0 :         return false;
    2212             :     }
    2213             : 
    2214      151448 :     switch (poWK->eWorkingDataType)
    2215             :     {
    2216           1 :         case GDT_Byte:
    2217           1 :             *pdfReal = pabySrc[iSrcOffset];
    2218           1 :             break;
    2219             : 
    2220           0 :         case GDT_Int8:
    2221           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2222           0 :             break;
    2223             : 
    2224           1 :         case GDT_Int16:
    2225           1 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2226           1 :             break;
    2227             : 
    2228      150357 :         case GDT_UInt16:
    2229      150357 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2230      150357 :             break;
    2231             : 
    2232         886 :         case GDT_Int32:
    2233         886 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2234         886 :             break;
    2235             : 
    2236          83 :         case GDT_UInt32:
    2237          83 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2238          83 :             break;
    2239             : 
    2240          16 :         case GDT_Int64:
    2241          16 :             *pdfReal = static_cast<double>(
    2242          16 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2243          16 :             break;
    2244             : 
    2245          16 :         case GDT_UInt64:
    2246          16 :             *pdfReal = static_cast<double>(
    2247          16 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2248          16 :             break;
    2249             : 
    2250           0 :         case GDT_Float16:
    2251           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2252           0 :             break;
    2253             : 
    2254           2 :         case GDT_Float32:
    2255           2 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
    2256           2 :             break;
    2257             : 
    2258          86 :         case GDT_Float64:
    2259          86 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2260          86 :             break;
    2261             : 
    2262           0 :         case GDT_CInt16:
    2263             :         case GDT_CInt32:
    2264             :         case GDT_CFloat16:
    2265             :         case GDT_CFloat32:
    2266             :         case GDT_CFloat64:
    2267             :         case GDT_Unknown:
    2268             :         case GDT_TypeCount:
    2269           0 :             CPLAssert(false);
    2270             :             return false;
    2271             :     }
    2272             : 
    2273      151448 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2274      150340 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2275             :     else
    2276        1108 :         *pdfDensity = 1.0;
    2277             : 
    2278      151448 :     return *pdfDensity != 0.0;
    2279             : }
    2280             : 
    2281             : /************************************************************************/
    2282             : /*                          GWKGetPixelRow()                            */
    2283             : /************************************************************************/
    2284             : 
    2285             : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
    2286             : /* data-types. */
    2287             : 
    2288     2352610 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
    2289             :                            GPtrDiff_t iSrcOffset, int nHalfSrcLen,
    2290             :                            double *padfDensity, double adfReal[],
    2291             :                            double *padfImag)
    2292             : {
    2293             :     // We know that nSrcLen is even, so we can *always* unroll loops 2x.
    2294     2352610 :     const int nSrcLen = nHalfSrcLen * 2;
    2295     2352610 :     bool bHasValid = false;
    2296             : 
    2297     2352610 :     if (padfDensity != nullptr)
    2298             :     {
    2299             :         // Init the density.
    2300     3343290 :         for (int i = 0; i < nSrcLen; i += 2)
    2301             :         {
    2302     2188270 :             padfDensity[i] = 1.0;
    2303     2188270 :             padfDensity[i + 1] = 1.0;
    2304             :         }
    2305             : 
    2306     1155020 :         if (poWK->panUnifiedSrcValid != nullptr)
    2307             :         {
    2308     3281460 :             for (int i = 0; i < nSrcLen; i += 2)
    2309             :             {
    2310     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
    2311     2067740 :                     bHasValid = true;
    2312             :                 else
    2313       74323 :                     padfDensity[i] = 0.0;
    2314             : 
    2315     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
    2316     2068400 :                     bHasValid = true;
    2317             :                 else
    2318       73668 :                     padfDensity[i + 1] = 0.0;
    2319             :             }
    2320             : 
    2321             :             // Reset or fail as needed.
    2322     1139400 :             if (bHasValid)
    2323     1116590 :                 bHasValid = false;
    2324             :             else
    2325       22806 :                 return false;
    2326             :         }
    2327             : 
    2328     1132210 :         if (poWK->papanBandSrcValid != nullptr &&
    2329           0 :             poWK->papanBandSrcValid[iBand] != nullptr)
    2330             :         {
    2331           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2332             :             {
    2333           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
    2334           0 :                     bHasValid = true;
    2335             :                 else
    2336           0 :                     padfDensity[i] = 0.0;
    2337             : 
    2338           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
    2339           0 :                                iSrcOffset + i + 1))
    2340           0 :                     bHasValid = true;
    2341             :                 else
    2342           0 :                     padfDensity[i + 1] = 0.0;
    2343             :             }
    2344             : 
    2345             :             // Reset or fail as needed.
    2346           0 :             if (bHasValid)
    2347           0 :                 bHasValid = false;
    2348             :             else
    2349           0 :                 return false;
    2350             :         }
    2351             :     }
    2352             : 
    2353             :     // TODO(schwehr): Fix casting.
    2354             :     // Fetch data.
    2355     2329800 :     switch (poWK->eWorkingDataType)
    2356             :     {
    2357     1121080 :         case GDT_Byte:
    2358             :         {
    2359     1121080 :             GByte *pSrc =
    2360     1121080 :                 reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
    2361     1121080 :             pSrc += iSrcOffset;
    2362     3243850 :             for (int i = 0; i < nSrcLen; i += 2)
    2363             :             {
    2364     2122770 :                 adfReal[i] = pSrc[i];
    2365     2122770 :                 adfReal[i + 1] = pSrc[i + 1];
    2366             :             }
    2367     1121080 :             break;
    2368             :         }
    2369             : 
    2370           0 :         case GDT_Int8:
    2371             :         {
    2372           0 :             GInt8 *pSrc =
    2373           0 :                 reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
    2374           0 :             pSrc += iSrcOffset;
    2375           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2376             :             {
    2377           0 :                 adfReal[i] = pSrc[i];
    2378           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2379             :             }
    2380           0 :             break;
    2381             :         }
    2382             : 
    2383        5614 :         case GDT_Int16:
    2384             :         {
    2385        5614 :             GInt16 *pSrc =
    2386        5614 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2387        5614 :             pSrc += iSrcOffset;
    2388       21492 :             for (int i = 0; i < nSrcLen; i += 2)
    2389             :             {
    2390       15878 :                 adfReal[i] = pSrc[i];
    2391       15878 :                 adfReal[i + 1] = pSrc[i + 1];
    2392             :             }
    2393        5614 :             break;
    2394             :         }
    2395             : 
    2396        4142 :         case GDT_UInt16:
    2397             :         {
    2398        4142 :             GUInt16 *pSrc =
    2399        4142 :                 reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
    2400        4142 :             pSrc += iSrcOffset;
    2401       18548 :             for (int i = 0; i < nSrcLen; i += 2)
    2402             :             {
    2403       14406 :                 adfReal[i] = pSrc[i];
    2404       14406 :                 adfReal[i + 1] = pSrc[i + 1];
    2405             :             }
    2406        4142 :             break;
    2407             :         }
    2408             : 
    2409         778 :         case GDT_Int32:
    2410             :         {
    2411         778 :             GInt32 *pSrc =
    2412         778 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2413         778 :             pSrc += iSrcOffset;
    2414        2288 :             for (int i = 0; i < nSrcLen; i += 2)
    2415             :             {
    2416        1510 :                 adfReal[i] = pSrc[i];
    2417        1510 :                 adfReal[i + 1] = pSrc[i + 1];
    2418             :             }
    2419         778 :             break;
    2420             :         }
    2421             : 
    2422         778 :         case GDT_UInt32:
    2423             :         {
    2424         778 :             GUInt32 *pSrc =
    2425         778 :                 reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
    2426         778 :             pSrc += iSrcOffset;
    2427        2288 :             for (int i = 0; i < nSrcLen; i += 2)
    2428             :             {
    2429        1510 :                 adfReal[i] = pSrc[i];
    2430        1510 :                 adfReal[i + 1] = pSrc[i + 1];
    2431             :             }
    2432         778 :             break;
    2433             :         }
    2434             : 
    2435          28 :         case GDT_Int64:
    2436             :         {
    2437          28 :             auto pSrc =
    2438          28 :                 reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
    2439          28 :             pSrc += iSrcOffset;
    2440          56 :             for (int i = 0; i < nSrcLen; i += 2)
    2441             :             {
    2442          28 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2443          28 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2444             :             }
    2445          28 :             break;
    2446             :         }
    2447             : 
    2448          28 :         case GDT_UInt64:
    2449             :         {
    2450          28 :             auto pSrc =
    2451          28 :                 reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
    2452          28 :             pSrc += iSrcOffset;
    2453          56 :             for (int i = 0; i < nSrcLen; i += 2)
    2454             :             {
    2455          28 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2456          28 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2457             :             }
    2458          28 :             break;
    2459             :         }
    2460             : 
    2461           0 :         case GDT_Float16:
    2462             :         {
    2463           0 :             GFloat16 *pSrc =
    2464           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2465           0 :             pSrc += iSrcOffset;
    2466           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2467             :             {
    2468           0 :                 adfReal[i] = pSrc[i];
    2469           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2470             :             }
    2471           0 :             break;
    2472             :         }
    2473             : 
    2474       25102 :         case GDT_Float32:
    2475             :         {
    2476       25102 :             float *pSrc =
    2477       25102 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2478       25102 :             pSrc += iSrcOffset;
    2479      121403 :             for (int i = 0; i < nSrcLen; i += 2)
    2480             :             {
    2481       96301 :                 adfReal[i] = pSrc[i];
    2482       96301 :                 adfReal[i + 1] = pSrc[i + 1];
    2483             :             }
    2484       25102 :             break;
    2485             :         }
    2486             : 
    2487         778 :         case GDT_Float64:
    2488             :         {
    2489         778 :             double *pSrc =
    2490         778 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2491         778 :             pSrc += iSrcOffset;
    2492        2288 :             for (int i = 0; i < nSrcLen; i += 2)
    2493             :             {
    2494        1510 :                 adfReal[i] = pSrc[i];
    2495        1510 :                 adfReal[i + 1] = pSrc[i + 1];
    2496             :             }
    2497         778 :             break;
    2498             :         }
    2499             : 
    2500     1169220 :         case GDT_CInt16:
    2501             :         {
    2502     1169220 :             GInt16 *pSrc =
    2503     1169220 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2504     1169220 :             pSrc += 2 * iSrcOffset;
    2505     4676020 :             for (int i = 0; i < nSrcLen; i += 2)
    2506             :             {
    2507     3506800 :                 adfReal[i] = pSrc[2 * i];
    2508     3506800 :                 padfImag[i] = pSrc[2 * i + 1];
    2509             : 
    2510     3506800 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2511     3506800 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2512             :             }
    2513     1169220 :             break;
    2514             :         }
    2515             : 
    2516         750 :         case GDT_CInt32:
    2517             :         {
    2518         750 :             GInt32 *pSrc =
    2519         750 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2520         750 :             pSrc += 2 * iSrcOffset;
    2521        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2522             :             {
    2523        1482 :                 adfReal[i] = pSrc[2 * i];
    2524        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2525             : 
    2526        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2527        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2528             :             }
    2529         750 :             break;
    2530             :         }
    2531             : 
    2532           0 :         case GDT_CFloat16:
    2533             :         {
    2534           0 :             GFloat16 *pSrc =
    2535           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2536           0 :             pSrc += 2 * iSrcOffset;
    2537           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2538             :             {
    2539           0 :                 adfReal[i] = pSrc[2 * i];
    2540           0 :                 padfImag[i] = pSrc[2 * i + 1];
    2541             : 
    2542           0 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2543           0 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2544             :             }
    2545           0 :             break;
    2546             :         }
    2547             : 
    2548         750 :         case GDT_CFloat32:
    2549             :         {
    2550         750 :             float *pSrc =
    2551         750 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2552         750 :             pSrc += 2 * iSrcOffset;
    2553        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2554             :             {
    2555        1482 :                 adfReal[i] = pSrc[2 * i];
    2556        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2557             : 
    2558        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2559        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2560             :             }
    2561         750 :             break;
    2562             :         }
    2563             : 
    2564         750 :         case GDT_CFloat64:
    2565             :         {
    2566         750 :             double *pSrc =
    2567         750 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2568         750 :             pSrc += 2 * iSrcOffset;
    2569        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2570             :             {
    2571        1482 :                 adfReal[i] = pSrc[2 * i];
    2572        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2573             : 
    2574        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2575        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2576             :             }
    2577         750 :             break;
    2578             :         }
    2579             : 
    2580           0 :         case GDT_Unknown:
    2581             :         case GDT_TypeCount:
    2582           0 :             CPLAssert(false);
    2583             :             if (padfDensity)
    2584             :                 memset(padfDensity, 0, nSrcLen * sizeof(double));
    2585             :             return false;
    2586             :     }
    2587             : 
    2588     2329800 :     if (padfDensity == nullptr)
    2589     1197590 :         return true;
    2590             : 
    2591     1132210 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2592             :     {
    2593     3231720 :         for (int i = 0; i < nSrcLen; i += 2)
    2594             :         {
    2595             :             // Take into account earlier calcs.
    2596     2111610 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2597             :             {
    2598     2071710 :                 padfDensity[i] = 1.0;
    2599     2071710 :                 bHasValid = true;
    2600             :             }
    2601             : 
    2602     2111610 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2603             :             {
    2604     2072360 :                 padfDensity[i + 1] = 1.0;
    2605     2072360 :                 bHasValid = true;
    2606             :             }
    2607             :         }
    2608             :     }
    2609             :     else
    2610             :     {
    2611       54348 :         for (int i = 0; i < nSrcLen; i += 2)
    2612             :         {
    2613       42243 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2614       42243 :                 padfDensity[i] = poWK->pafUnifiedSrcDensity[iSrcOffset + i];
    2615       42243 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2616       41704 :                 bHasValid = true;
    2617             : 
    2618       42243 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2619       42243 :                 padfDensity[i + 1] =
    2620       42243 :                     poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1];
    2621       42243 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2622       41594 :                 bHasValid = true;
    2623             :         }
    2624             :     }
    2625             : 
    2626     1132210 :     return bHasValid;
    2627             : }
    2628             : 
    2629             : /************************************************************************/
    2630             : /*                          GWKGetPixelT()                              */
    2631             : /************************************************************************/
    2632             : 
    2633             : template <class T>
    2634     8159117 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
    2635             :                          GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
    2636             : 
    2637             : {
    2638     8159117 :     T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2639             : 
    2640    18455895 :     if ((poWK->panUnifiedSrcValid != nullptr &&
    2641    16318274 :          !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
    2642     8159117 :         (poWK->papanBandSrcValid != nullptr &&
    2643          21 :          poWK->papanBandSrcValid[iBand] != nullptr &&
    2644          21 :          !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
    2645             :     {
    2646           9 :         *pdfDensity = 0.0;
    2647           9 :         return false;
    2648             :     }
    2649             : 
    2650     8159107 :     *pValue = pSrc[iSrcOffset];
    2651             : 
    2652     8159107 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2653     7974694 :         *pdfDensity = 1.0;
    2654             :     else
    2655      184414 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2656             : 
    2657     8159107 :     return *pdfDensity != 0.0;
    2658             : }
    2659             : 
    2660             : /************************************************************************/
    2661             : /*                        GWKBilinearResample()                         */
    2662             : /*     Set of bilinear interpolators                                    */
    2663             : /************************************************************************/
    2664             : 
    2665       72024 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2666             :                                        double dfSrcX, double dfSrcY,
    2667             :                                        double *pdfDensity, double *pdfReal,
    2668             :                                        double *pdfImag)
    2669             : 
    2670             : {
    2671             :     // Save as local variables to avoid following pointers.
    2672       72024 :     const int nSrcXSize = poWK->nSrcXSize;
    2673       72024 :     const int nSrcYSize = poWK->nSrcYSize;
    2674             : 
    2675       72024 :     int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2676       72024 :     int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2677       72024 :     double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2678       72024 :     double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2679       72024 :     bool bShifted = false;
    2680             : 
    2681       72024 :     if (iSrcX == -1)
    2682             :     {
    2683         292 :         iSrcX = 0;
    2684         292 :         dfRatioX = 1;
    2685             :     }
    2686       72024 :     if (iSrcY == -1)
    2687             :     {
    2688        7686 :         iSrcY = 0;
    2689        7686 :         dfRatioY = 1;
    2690             :     }
    2691       72024 :     GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    2692             : 
    2693             :     // Shift so we don't overrun the array.
    2694       72024 :     if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
    2695       71972 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
    2696       71972 :             iSrcOffset + nSrcXSize + 1)
    2697             :     {
    2698         104 :         bShifted = true;
    2699         104 :         --iSrcOffset;
    2700             :     }
    2701             : 
    2702       72024 :     double adfDensity[2] = {0.0, 0.0};
    2703       72024 :     double adfReal[2] = {0.0, 0.0};
    2704       72024 :     double adfImag[2] = {0.0, 0.0};
    2705       72024 :     double dfAccumulatorReal = 0.0;
    2706       72024 :     double dfAccumulatorImag = 0.0;
    2707       72024 :     double dfAccumulatorDensity = 0.0;
    2708       72024 :     double dfAccumulatorDivisor = 0.0;
    2709             : 
    2710       72024 :     const GPtrDiff_t nSrcPixels =
    2711       72024 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
    2712             :     // Get pixel row.
    2713       72024 :     if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
    2714      144048 :         iSrcOffset < nSrcPixels &&
    2715       72024 :         GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
    2716             :                        adfImag))
    2717             :     {
    2718       66368 :         double dfMult1 = dfRatioX * dfRatioY;
    2719       66368 :         double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
    2720             : 
    2721             :         // Shifting corrected.
    2722       66368 :         if (bShifted)
    2723             :         {
    2724         104 :             adfReal[0] = adfReal[1];
    2725         104 :             adfImag[0] = adfImag[1];
    2726         104 :             adfDensity[0] = adfDensity[1];
    2727             :         }
    2728             : 
    2729             :         // Upper Left Pixel.
    2730       66368 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2731       66368 :             adfDensity[0] > SRC_DENSITY_THRESHOLD)
    2732             :         {
    2733       60938 :             dfAccumulatorDivisor += dfMult1;
    2734             : 
    2735       60938 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    2736       60938 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    2737       60938 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    2738             :         }
    2739             : 
    2740             :         // Upper Right Pixel.
    2741       66368 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    2742       65827 :             adfDensity[1] > SRC_DENSITY_THRESHOLD)
    2743             :         {
    2744       60553 :             dfAccumulatorDivisor += dfMult2;
    2745             : 
    2746       60553 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    2747       60553 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    2748       60553 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    2749             :         }
    2750             :     }
    2751             : 
    2752             :     // Get pixel row.
    2753       72024 :     if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
    2754      212030 :         iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
    2755       67982 :         GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
    2756             :                        adfReal, adfImag))
    2757             :     {
    2758       62423 :         double dfMult1 = dfRatioX * (1.0 - dfRatioY);
    2759       62423 :         double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    2760             : 
    2761             :         // Shifting corrected
    2762       62423 :         if (bShifted)
    2763             :         {
    2764          52 :             adfReal[0] = adfReal[1];
    2765          52 :             adfImag[0] = adfImag[1];
    2766          52 :             adfDensity[0] = adfDensity[1];
    2767             :         }
    2768             : 
    2769             :         // Lower Left Pixel
    2770       62423 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2771       62423 :             adfDensity[0] > SRC_DENSITY_THRESHOLD)
    2772             :         {
    2773       57144 :             dfAccumulatorDivisor += dfMult1;
    2774             : 
    2775       57144 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    2776       57144 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    2777       57144 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    2778             :         }
    2779             : 
    2780             :         // Lower Right Pixel.
    2781       62423 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    2782       61934 :             adfDensity[1] > SRC_DENSITY_THRESHOLD)
    2783             :         {
    2784       56957 :             dfAccumulatorDivisor += dfMult2;
    2785             : 
    2786       56957 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    2787       56957 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    2788       56957 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    2789             :         }
    2790             :     }
    2791             : 
    2792             :     /* -------------------------------------------------------------------- */
    2793             :     /*      Return result.                                                  */
    2794             :     /* -------------------------------------------------------------------- */
    2795       72024 :     if (dfAccumulatorDivisor == 1.0)
    2796             :     {
    2797       40967 :         *pdfReal = dfAccumulatorReal;
    2798       40967 :         *pdfImag = dfAccumulatorImag;
    2799       40967 :         *pdfDensity = dfAccumulatorDensity;
    2800       40967 :         return false;
    2801             :     }
    2802       31057 :     else if (dfAccumulatorDivisor < 0.00001)
    2803             :     {
    2804           0 :         *pdfReal = 0.0;
    2805           0 :         *pdfImag = 0.0;
    2806           0 :         *pdfDensity = 0.0;
    2807           0 :         return false;
    2808             :     }
    2809             :     else
    2810             :     {
    2811       31057 :         *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
    2812       31057 :         *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
    2813       31057 :         *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
    2814       31057 :         return true;
    2815             :     }
    2816             : }
    2817             : 
    2818             : template <class T>
    2819     3672314 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    2820             :                                                int iBand, double dfSrcX,
    2821             :                                                double dfSrcY, T *pValue)
    2822             : 
    2823             : {
    2824             : 
    2825     3672314 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2826     3672314 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2827     3672314 :     GPtrDiff_t iSrcOffset =
    2828     3672314 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    2829     3672314 :     const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2830     3672314 :     const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2831             : 
    2832     3672314 :     const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2833             : 
    2834     3672314 :     if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    2835     3570478 :         iSrcY + 1 < poWK->nSrcYSize)
    2836             :     {
    2837     3547620 :         const double dfAccumulator =
    2838     3547620 :             (pSrc[iSrcOffset] * dfRatioX +
    2839     3547620 :              pSrc[iSrcOffset + 1] * (1.0 - dfRatioX)) *
    2840             :                 dfRatioY +
    2841     3547620 :             (pSrc[iSrcOffset + poWK->nSrcXSize] * dfRatioX +
    2842     3547620 :              pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * (1.0 - dfRatioX)) *
    2843     3547620 :                 (1.0 - dfRatioY);
    2844             : 
    2845     3547620 :         *pValue = GWKRoundValueT<T>(dfAccumulator);
    2846             : 
    2847     3547620 :         return true;
    2848             :     }
    2849             : 
    2850      124701 :     double dfAccumulatorDivisor = 0.0;
    2851      124701 :     double dfAccumulator = 0.0;
    2852             : 
    2853             :     // Upper Left Pixel.
    2854      124701 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
    2855       51993 :         iSrcY < poWK->nSrcYSize)
    2856             :     {
    2857       51993 :         const double dfMult = dfRatioX * dfRatioY;
    2858             : 
    2859       51993 :         dfAccumulatorDivisor += dfMult;
    2860             : 
    2861       51993 :         dfAccumulator += pSrc[iSrcOffset] * dfMult;
    2862             :     }
    2863             : 
    2864             :     // Upper Right Pixel.
    2865      124701 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    2866       58836 :         iSrcY < poWK->nSrcYSize)
    2867             :     {
    2868       58836 :         const double dfMult = (1.0 - dfRatioX) * dfRatioY;
    2869             : 
    2870       58836 :         dfAccumulatorDivisor += dfMult;
    2871             : 
    2872       58836 :         dfAccumulator += pSrc[iSrcOffset + 1] * dfMult;
    2873             :     }
    2874             : 
    2875             :     // Lower Right Pixel.
    2876      124701 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    2877       94953 :         iSrcY + 1 < poWK->nSrcYSize)
    2878             :     {
    2879       71702 :         const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    2880             : 
    2881       71702 :         dfAccumulatorDivisor += dfMult;
    2882             : 
    2883       71702 :         dfAccumulator += pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * dfMult;
    2884             :     }
    2885             : 
    2886             :     // Lower Left Pixel.
    2887      124701 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    2888       88088 :         iSrcY + 1 < poWK->nSrcYSize)
    2889             :     {
    2890       64641 :         const double dfMult = dfRatioX * (1.0 - dfRatioY);
    2891             : 
    2892       64641 :         dfAccumulatorDivisor += dfMult;
    2893             : 
    2894       64641 :         dfAccumulator += pSrc[iSrcOffset + poWK->nSrcXSize] * dfMult;
    2895             :     }
    2896             : 
    2897             :     /* -------------------------------------------------------------------- */
    2898             :     /*      Return result.                                                  */
    2899             :     /* -------------------------------------------------------------------- */
    2900      124701 :     double dfValue = 0.0;
    2901             : 
    2902      124701 :     if (dfAccumulatorDivisor < 0.00001)
    2903             :     {
    2904           0 :         *pValue = 0;
    2905           0 :         return false;
    2906             :     }
    2907      124701 :     else if (dfAccumulatorDivisor == 1.0)
    2908             :     {
    2909        7320 :         dfValue = dfAccumulator;
    2910             :     }
    2911             :     else
    2912             :     {
    2913      117381 :         dfValue = dfAccumulator / dfAccumulatorDivisor;
    2914             :     }
    2915             : 
    2916      124701 :     *pValue = GWKRoundValueT<T>(dfValue);
    2917             : 
    2918      124701 :     return true;
    2919             : }
    2920             : 
    2921             : /************************************************************************/
    2922             : /*                        GWKCubicResample()                            */
    2923             : /*     Set of bicubic interpolators using cubic convolution.            */
    2924             : /************************************************************************/
    2925             : 
    2926             : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
    2927             : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
    2928             : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
    2929             : 
    2930             : template <typename T>
    2931     1602850 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
    2932             :                                  T f1, T f2, T f3)
    2933             : {
    2934     1602850 :     return (f1 + T(0.5) * (distance1 * (f2 - f0) +
    2935     1602850 :                            distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
    2936     1602850 :                            distance3 * (3 * (f1 - f2) + f3 - f0)));
    2937             : }
    2938             : 
    2939             : /************************************************************************/
    2940             : /*                       GWKCubicComputeWeights()                       */
    2941             : /************************************************************************/
    2942             : 
    2943             : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
    2944             : 
    2945             : template <typename T>
    2946     2267674 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
    2947             : {
    2948     2267674 :     const T halfX = T(0.5) * x;
    2949     2267674 :     const T threeX = T(3.0) * x;
    2950     2267674 :     const T halfX2 = halfX * x;
    2951             : 
    2952     2267674 :     coeffs[0] = halfX * (-1 + x * (2 - x));
    2953     2267674 :     coeffs[1] = 1 + halfX2 * (-5 + threeX);
    2954     2267674 :     coeffs[2] = halfX * (1 + x * (4 - threeX));
    2955     2267674 :     coeffs[3] = halfX2 * (-1 + x);
    2956     2267674 : }
    2957             : 
    2958             : // TODO(schwehr): Use an inline function.
    2959             : #define CONVOL4(v1, v2)                                                        \
    2960             :     ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2] +               \
    2961             :      (v1)[3] * (v2)[3])
    2962             : 
    2963             : #if 0
    2964             : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
    2965             : // instead of 17.
    2966             : // TODO(schwehr): Use an inline function.
    2967             : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX)             \
    2968             :     {                                                                          \
    2969             :         const double dfX = dfX_;                                               \
    2970             :         dfHalfX = 0.5 * dfX;                                                   \
    2971             :         const double dfThreeX = 3.0 * dfX;                                     \
    2972             :         const double dfXMinus1 = dfX - 1;                                      \
    2973             :                                                                                \
    2974             :         adfCoeffs[0] = -1 + dfX * (2 - dfX);                                   \
    2975             :         adfCoeffs[1] = dfX * (-5 + dfThreeX);                                  \
    2976             :         /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/                           \
    2977             :         adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1];                              \
    2978             :         /*adfCoeffs[3] = dfX * (-1 + dfX); */                                  \
    2979             :         adfCoeffs[3] = dfXMinus1 - adfCoeffs[0];                               \
    2980             :     }
    2981             : 
    2982             : // TODO(schwehr): Use an inline function.
    2983             : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX)                               \
    2984             :     ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
    2985             :                            (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
    2986             : #endif
    2987             : 
    2988      299879 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2989             :                                     double dfSrcX, double dfSrcY,
    2990             :                                     double *pdfDensity, double *pdfReal,
    2991             :                                     double *pdfImag)
    2992             : 
    2993             : {
    2994      299879 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    2995      299879 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    2996      299879 :     GPtrDiff_t iSrcOffset =
    2997      299879 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    2998      299879 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    2999      299879 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3000      299879 :     double adfDensity[4] = {};
    3001      299879 :     double adfReal[4] = {};
    3002      299879 :     double adfImag[4] = {};
    3003             : 
    3004             :     // Get the bilinear interpolation at the image borders.
    3005      299879 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3006      284412 :         iSrcY + 2 >= poWK->nSrcYSize)
    3007       24136 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3008       24136 :                                           pdfDensity, pdfReal, pdfImag);
    3009             : 
    3010      275743 :     double adfValueDens[4] = {};
    3011      275743 :     double adfValueReal[4] = {};
    3012      275743 :     double adfValueImag[4] = {};
    3013             : 
    3014      275743 :     double adfCoeffsX[4] = {};
    3015      275743 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3016             : 
    3017     1232410 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3018             :     {
    3019     1003120 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3020      991507 :                             2, adfDensity, adfReal, adfImag) ||
    3021      991507 :             adfDensity[0] < SRC_DENSITY_THRESHOLD ||
    3022      973867 :             adfDensity[1] < SRC_DENSITY_THRESHOLD ||
    3023     2960190 :             adfDensity[2] < SRC_DENSITY_THRESHOLD ||
    3024      965566 :             adfDensity[3] < SRC_DENSITY_THRESHOLD)
    3025             :         {
    3026       46449 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3027       46449 :                                               pdfDensity, pdfReal, pdfImag);
    3028             :         }
    3029             : 
    3030      956668 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3031      956668 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3032      956668 :         adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
    3033             :     }
    3034             : 
    3035             :     /* -------------------------------------------------------------------- */
    3036             :     /*      For now, if we have any pixels missing in the kernel area,      */
    3037             :     /*      we fallback on using bilinear interpolation.  Ideally we        */
    3038             :     /*      should do "weight adjustment" of our results similarly to       */
    3039             :     /*      what is done for the cubic spline and lanc. interpolators.      */
    3040             :     /* -------------------------------------------------------------------- */
    3041             : 
    3042      229294 :     double adfCoeffsY[4] = {};
    3043      229294 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3044             : 
    3045      229294 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3046      229294 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3047      229294 :     *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
    3048             : 
    3049      229294 :     return true;
    3050             : }
    3051             : 
    3052             : #ifdef USE_SSE2
    3053             : 
    3054             : /************************************************************************/
    3055             : /*                           XMMLoad4Values()                           */
    3056             : /*                                                                      */
    3057             : /*  Load 4 packed byte or uint16, cast them to float and put them in a  */
    3058             : /*  m128 register.                                                      */
    3059             : /************************************************************************/
    3060             : 
    3061      949092 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
    3062             : {
    3063             :     unsigned int i;
    3064      949092 :     memcpy(&i, ptr, 4);
    3065     1898180 :     __m128i xmm_i = _mm_cvtsi32_si128(i);
    3066             :     // Zero extend 4 packed unsigned 8-bit integers in a to packed
    3067             :     // 32-bit integers.
    3068             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3069             :     xmm_i = _mm_cvtepu8_epi32(xmm_i);
    3070             : #else
    3071     1898180 :     xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
    3072     1898180 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3073             : #endif
    3074     1898180 :     return _mm_cvtepi32_ps(xmm_i);
    3075             : }
    3076             : 
    3077        5292 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
    3078             : {
    3079             :     GUInt64 i;
    3080        5292 :     memcpy(&i, ptr, 8);
    3081       10584 :     __m128i xmm_i = _mm_cvtsi64_si128(i);
    3082             :     // Zero extend 4 packed unsigned 16-bit integers in a to packed
    3083             :     // 32-bit integers.
    3084             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3085             :     xmm_i = _mm_cvtepu16_epi32(xmm_i);
    3086             : #else
    3087       10584 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3088             : #endif
    3089       10584 :     return _mm_cvtepi32_ps(xmm_i);
    3090             : }
    3091             : 
    3092             : /************************************************************************/
    3093             : /*                           XMMHorizontalAdd()                         */
    3094             : /*                                                                      */
    3095             : /*  Return the sum of the 4 floating points of the register.            */
    3096             : /************************************************************************/
    3097             : 
    3098             : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
    3099             : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3100             : {
    3101             :     __m128 shuf = _mm_movehdup_ps(v);   // (v3   , v3   , v1   , v1)
    3102             :     __m128 sums = _mm_add_ps(v, shuf);  // (v3+v3, v3+v2, v1+v1, v1+v0)
    3103             :     shuf = _mm_movehl_ps(shuf, sums);   // (v3   , v3   , v3+v3, v3+v2)
    3104             :     sums = _mm_add_ss(sums, shuf);      // (v1+v0)+(v3+v2)
    3105             :     return _mm_cvtss_f32(sums);
    3106             : }
    3107             : #else
    3108      238596 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3109             : {
    3110      238596 :     __m128 shuf = _mm_movehl_ps(v, v);     // (v3   , v2   , v3   , v2)
    3111      238596 :     __m128 sums = _mm_add_ps(v, shuf);     // (v3+v3, v2+v2, v3+v1, v2+v0)
    3112      238596 :     shuf = _mm_shuffle_ps(sums, sums, 1);  // (v2+v0, v2+v0, v2+v0, v3+v1)
    3113      238596 :     sums = _mm_add_ss(sums, shuf);         // (v2+v0)+(v3+v1)
    3114      238596 :     return _mm_cvtss_f32(sums);
    3115             : }
    3116             : #endif
    3117             : 
    3118             : #endif  // define USE_SSE2
    3119             : 
    3120             : /************************************************************************/
    3121             : /*            GWKCubicResampleSrcMaskIsDensity4SampleRealT()            */
    3122             : /************************************************************************/
    3123             : 
    3124             : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
    3125             : // because there are a few assumptions above those types.
    3126             : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
    3127             : // perf benefit.
    3128             : 
    3129             : template <class T>
    3130         361 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
    3131             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3132             :     double *pdfDensity, double *pdfReal)
    3133             : {
    3134         361 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3135         361 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3136         361 :     const GPtrDiff_t iSrcOffset =
    3137         361 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3138             : 
    3139             :     // Get the bilinear interpolation at the image borders.
    3140         361 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3141         361 :         iSrcY + 2 >= poWK->nSrcYSize)
    3142             :     {
    3143           0 :         double adfImagIgnored[4] = {};
    3144           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3145           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3146             :     }
    3147             : 
    3148             : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3149             :     const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
    3150             :     const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
    3151             : 
    3152             :     // TODO(schwehr): Explain the magic numbers.
    3153             :     float afTemp[4 + 4 + 4 + 1];
    3154             :     float *pafAligned =
    3155             :         reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
    3156             :     float *pafCoeffs = pafAligned;
    3157             :     float *pafDensity = pafAligned + 4;
    3158             :     float *pafValue = pafAligned + 8;
    3159             : 
    3160             :     const float fHalfDeltaX = 0.5f * fDeltaX;
    3161             :     const float fThreeDeltaX = 3.0f * fDeltaX;
    3162             :     const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
    3163             : 
    3164             :     pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
    3165             :     pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
    3166             :     pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
    3167             :     pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
    3168             :     __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
    3169             :     const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD);
    3170             : 
    3171             :     __m128 xmmMaskLowDensity = _mm_setzero_ps();
    3172             :     for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
    3173             :          i++, iOffset += poWK->nSrcXSize)
    3174             :     {
    3175             :         const __m128 xmmDensity =
    3176             :             _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
    3177             :         xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
    3178             :                                       _mm_cmplt_ps(xmmDensity, xmmThreshold));
    3179             :         pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3180             : 
    3181             :         const __m128 xmmValues =
    3182             :             XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
    3183             :         pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
    3184             :     }
    3185             :     if (_mm_movemask_ps(xmmMaskLowDensity))
    3186             :     {
    3187             :         double adfImagIgnored[4] = {};
    3188             :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3189             :                                           pdfDensity, pdfReal, adfImagIgnored);
    3190             :     }
    3191             : 
    3192             :     const float fHalfDeltaY = 0.5f * fDeltaY;
    3193             :     const float fThreeDeltaY = 3.0f * fDeltaY;
    3194             :     const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
    3195             : 
    3196             :     pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
    3197             :     pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
    3198             :     pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
    3199             :     pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
    3200             : 
    3201             :     xmmCoeffs = _mm_load_ps(pafCoeffs);
    3202             : 
    3203             :     const __m128 xmmDensity = _mm_load_ps(pafDensity);
    3204             :     const __m128 xmmValue = _mm_load_ps(pafValue);
    3205             :     *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3206             :     *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
    3207             : 
    3208             :     // We did all above computations on float32 whereas the general case is
    3209             :     // float64. Not sure if one is fundamentally more correct than the other
    3210             :     // one, but we want our optimization to give the same result as the
    3211             :     // general case as much as possible, so if the resulting value is
    3212             :     // close to some_int_value + 0.5, redo the computation with the general
    3213             :     // case.
    3214             :     // Note: If other types than Byte or UInt16, will need changes.
    3215             :     if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
    3216             :         return true;
    3217             : 
    3218             : #endif  // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3219             : 
    3220         361 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3221         361 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3222             : 
    3223         361 :     double adfValueDens[4] = {};
    3224         361 :     double adfValueReal[4] = {};
    3225             : 
    3226         361 :     double adfCoeffsX[4] = {};
    3227         361 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3228             : 
    3229         361 :     double adfCoeffsY[4] = {};
    3230         361 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3231             : 
    3232        1433 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3233             :     {
    3234        1177 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3235             : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
    3236        1177 :         if (poWK->pafUnifiedSrcDensity[iOffset + 0] < SRC_DENSITY_THRESHOLD ||
    3237        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 1] < SRC_DENSITY_THRESHOLD ||
    3238        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 2] < SRC_DENSITY_THRESHOLD ||
    3239        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 3] < SRC_DENSITY_THRESHOLD)
    3240             :         {
    3241         105 :             double adfImagIgnored[4] = {};
    3242         105 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3243             :                                               pdfDensity, pdfReal,
    3244         105 :                                               adfImagIgnored);
    3245             :         }
    3246             : #endif
    3247             : 
    3248        1072 :         adfValueDens[i + 1] =
    3249        1072 :             CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
    3250             : 
    3251        1072 :         adfValueReal[i + 1] = CONVOL4(
    3252             :             adfCoeffsX,
    3253             :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3254             :     }
    3255             : 
    3256         256 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3257         256 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3258             : 
    3259         256 :     return true;
    3260             : }
    3261             : 
    3262             : /************************************************************************/
    3263             : /*              GWKCubicResampleSrcMaskIsDensity4SampleReal()             */
    3264             : /*     Bi-cubic when source has and only has pafUnifiedSrcDensity.      */
    3265             : /************************************************************************/
    3266             : 
    3267           0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
    3268             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3269             :     double *pdfDensity, double *pdfReal)
    3270             : 
    3271             : {
    3272           0 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3273           0 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3274           0 :     const GPtrDiff_t iSrcOffset =
    3275           0 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3276           0 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3277           0 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3278             : 
    3279             :     // Get the bilinear interpolation at the image borders.
    3280           0 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3281           0 :         iSrcY + 2 >= poWK->nSrcYSize)
    3282             :     {
    3283           0 :         double adfImagIgnored[4] = {};
    3284           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3285           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3286             :     }
    3287             : 
    3288           0 :     double adfCoeffsX[4] = {};
    3289           0 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3290             : 
    3291           0 :     double adfCoeffsY[4] = {};
    3292           0 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3293             : 
    3294           0 :     double adfValueDens[4] = {};
    3295           0 :     double adfValueReal[4] = {};
    3296           0 :     double adfDensity[4] = {};
    3297           0 :     double adfReal[4] = {};
    3298           0 :     double adfImagIgnored[4] = {};
    3299             : 
    3300           0 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3301             :     {
    3302           0 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3303           0 :                             2, adfDensity, adfReal, adfImagIgnored) ||
    3304           0 :             adfDensity[0] < SRC_DENSITY_THRESHOLD ||
    3305           0 :             adfDensity[1] < SRC_DENSITY_THRESHOLD ||
    3306           0 :             adfDensity[2] < SRC_DENSITY_THRESHOLD ||
    3307           0 :             adfDensity[3] < SRC_DENSITY_THRESHOLD)
    3308             :         {
    3309           0 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3310             :                                               pdfDensity, pdfReal,
    3311           0 :                                               adfImagIgnored);
    3312             :         }
    3313             : 
    3314           0 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3315           0 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3316             :     }
    3317             : 
    3318           0 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3319           0 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3320             : 
    3321           0 :     return true;
    3322             : }
    3323             : 
    3324             : template <class T>
    3325     1906603 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3326             :                                             int iBand, double dfSrcX,
    3327             :                                             double dfSrcY, T *pValue)
    3328             : 
    3329             : {
    3330     1906603 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3331     1906603 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3332     1906603 :     const GPtrDiff_t iSrcOffset =
    3333     1906603 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3334     1906603 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3335     1906603 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3336     1906603 :     const double dfDeltaY2 = dfDeltaY * dfDeltaY;
    3337     1906603 :     const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
    3338             : 
    3339             :     // Get the bilinear interpolation at the image borders.
    3340     1906603 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3341     1662527 :         iSrcY + 2 >= poWK->nSrcYSize)
    3342      303751 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    3343      303751 :                                                   pValue);
    3344             : 
    3345     1602852 :     double adfCoeffs[4] = {};
    3346     1602852 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
    3347             : 
    3348     1602852 :     double adfValue[4] = {};
    3349             : 
    3350     8014250 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3351             :     {
    3352     6411406 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3353             : 
    3354     6411406 :         adfValue[i + 1] = CONVOL4(
    3355             :             adfCoeffs,
    3356             :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3357             :     }
    3358             : 
    3359             :     const double dfValue =
    3360     1602852 :         CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
    3361             :                          adfValue[1], adfValue[2], adfValue[3]);
    3362             : 
    3363     1602852 :     *pValue = GWKClampValueT<T>(dfValue);
    3364             : 
    3365     1602852 :     return true;
    3366             : }
    3367             : 
    3368             : /************************************************************************/
    3369             : /*                          GWKLanczosSinc()                            */
    3370             : /************************************************************************/
    3371             : 
    3372             : /*
    3373             :  * Lanczos windowed sinc interpolation kernel with radius r.
    3374             :  *        /
    3375             :  *        | sinc(x) * sinc(x/r), if |x| < r
    3376             :  * L(x) = | 1, if x = 0                     ,
    3377             :  *        | 0, otherwise
    3378             :  *        \
    3379             :  *
    3380             :  * where sinc(x) = sin(PI * x) / (PI * x).
    3381             :  */
    3382             : 
    3383        1056 : static double GWKLanczosSinc(double dfX)
    3384             : {
    3385        1056 :     if (dfX == 0.0)
    3386           0 :         return 1.0;
    3387             : 
    3388        1056 :     const double dfPIX = M_PI * dfX;
    3389        1056 :     const double dfPIXoverR = dfPIX / 3;
    3390        1056 :     const double dfPIX2overR = dfPIX * dfPIXoverR;
    3391             :     // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3392             :     // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3393        1056 :     const double dfSinPIXoverR = sin(dfPIXoverR);
    3394        1056 :     const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3395        1056 :     const double dfSinPIXMulSinPIXoverR =
    3396        1056 :         (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3397        1056 :     return dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3398             : }
    3399             : 
    3400      106242 : static double GWKLanczosSinc4Values(double *padfValues)
    3401             : {
    3402      531210 :     for (int i = 0; i < 4; i++)
    3403             :     {
    3404      424968 :         if (padfValues[i] == 0.0)
    3405             :         {
    3406           0 :             padfValues[i] = 1.0;
    3407             :         }
    3408             :         else
    3409             :         {
    3410      424968 :             const double dfPIX = M_PI * padfValues[i];
    3411      424968 :             const double dfPIXoverR = dfPIX / 3;
    3412      424968 :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3413             :             // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3414             :             // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3415      424968 :             const double dfSinPIXoverR = sin(dfPIXoverR);
    3416      424968 :             const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3417      424968 :             const double dfSinPIXMulSinPIXoverR =
    3418      424968 :                 (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3419      424968 :             padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3420             :         }
    3421             :     }
    3422      106242 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3423             : }
    3424             : 
    3425             : /************************************************************************/
    3426             : /*                           GWKBilinear()                              */
    3427             : /************************************************************************/
    3428             : 
    3429     6668440 : static double GWKBilinear(double dfX)
    3430             : {
    3431     6668440 :     double dfAbsX = fabs(dfX);
    3432     6668440 :     if (dfAbsX <= 1.0)
    3433     6197330 :         return 1 - dfAbsX;
    3434             :     else
    3435      471105 :         return 0.0;
    3436             : }
    3437             : 
    3438      396096 : static double GWKBilinear4Values(double *padfValues)
    3439             : {
    3440      396096 :     double dfAbsX0 = fabs(padfValues[0]);
    3441      396096 :     double dfAbsX1 = fabs(padfValues[1]);
    3442      396096 :     double dfAbsX2 = fabs(padfValues[2]);
    3443      396096 :     double dfAbsX3 = fabs(padfValues[3]);
    3444      396096 :     if (dfAbsX0 <= 1.0)
    3445      290408 :         padfValues[0] = 1 - dfAbsX0;
    3446             :     else
    3447      105688 :         padfValues[0] = 0.0;
    3448      396096 :     if (dfAbsX1 <= 1.0)
    3449      396096 :         padfValues[1] = 1 - dfAbsX1;
    3450             :     else
    3451           0 :         padfValues[1] = 0.0;
    3452      396096 :     if (dfAbsX2 <= 1.0)
    3453      396096 :         padfValues[2] = 1 - dfAbsX2;
    3454             :     else
    3455           0 :         padfValues[2] = 0.0;
    3456      396096 :     if (dfAbsX3 <= 1.0)
    3457      290300 :         padfValues[3] = 1 - dfAbsX3;
    3458             :     else
    3459      105796 :         padfValues[3] = 0.0;
    3460      396096 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3461             : }
    3462             : 
    3463             : /************************************************************************/
    3464             : /*                            GWKCubic()                                */
    3465             : /************************************************************************/
    3466             : 
    3467     4350250 : static double GWKCubic(double dfX)
    3468             : {
    3469     4350250 :     return CubicKernel(dfX);
    3470             : }
    3471             : 
    3472     7094020 : static double GWKCubic4Values(double *padfValues)
    3473             : {
    3474     7094020 :     const double dfAbsX_0 = fabs(padfValues[0]);
    3475     7094020 :     const double dfAbsX_1 = fabs(padfValues[1]);
    3476     7094020 :     const double dfAbsX_2 = fabs(padfValues[2]);
    3477     7094020 :     const double dfAbsX_3 = fabs(padfValues[3]);
    3478     7094020 :     const double dfX2_0 = padfValues[0] * padfValues[0];
    3479     7094020 :     const double dfX2_1 = padfValues[1] * padfValues[1];
    3480     7094020 :     const double dfX2_2 = padfValues[2] * padfValues[2];
    3481     7094020 :     const double dfX2_3 = padfValues[3] * padfValues[3];
    3482             : 
    3483     7094020 :     double dfVal0 = 0.0;
    3484     7094020 :     if (dfAbsX_0 <= 1.0)
    3485     1030570 :         dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
    3486     6063460 :     else if (dfAbsX_0 <= 2.0)
    3487     4312220 :         dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
    3488             : 
    3489     7094020 :     double dfVal1 = 0.0;
    3490     7094020 :     if (dfAbsX_1 <= 1.0)
    3491     4127940 :         dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
    3492     2966090 :     else if (dfAbsX_1 <= 2.0)
    3493     2971000 :         dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
    3494             : 
    3495     7094020 :     double dfVal2 = 0.0;
    3496     7094020 :     if (dfAbsX_2 <= 1.0)
    3497     5938430 :         dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
    3498     1155590 :     else if (dfAbsX_2 <= 2.0)
    3499     1151460 :         dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
    3500             : 
    3501     7094020 :     double dfVal3 = 0.0;
    3502     7094020 :     if (dfAbsX_3 <= 1.0)
    3503     3165600 :         dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
    3504     3928420 :     else if (dfAbsX_3 <= 2.0)
    3505     3665830 :         dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
    3506             : 
    3507     7094020 :     padfValues[0] = dfVal0;
    3508     7094020 :     padfValues[1] = dfVal1;
    3509     7094020 :     padfValues[2] = dfVal2;
    3510     7094020 :     padfValues[3] = dfVal3;
    3511     7094020 :     return dfVal0 + dfVal1 + dfVal2 + dfVal3;
    3512             : }
    3513             : 
    3514             : /************************************************************************/
    3515             : /*                           GWKBSpline()                               */
    3516             : /************************************************************************/
    3517             : 
    3518             : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
    3519             : // Equation 8 with (B,C)=(1,0)
    3520             : // 1/6 * ( 3 * |x|^3 -  6 * |x|^2 + 4) |x| < 1
    3521             : // 1/6 * ( -|x|^3 + 6 |x|^2  - 12|x| + 8) |x| >= 1 and |x| < 2
    3522             : 
    3523      138696 : static double GWKBSpline(double x)
    3524             : {
    3525      138696 :     const double xp2 = x + 2.0;
    3526      138696 :     const double xp1 = x + 1.0;
    3527      138696 :     const double xm1 = x - 1.0;
    3528             : 
    3529             :     // This will most likely be used, so we'll compute it ahead of time to
    3530             :     // avoid stalling the processor.
    3531      138696 :     const double xp2c = xp2 * xp2 * xp2;
    3532             : 
    3533             :     // Note that the test is computed only if it is needed.
    3534             :     // TODO(schwehr): Make this easier to follow.
    3535             :     return xp2 > 0.0
    3536      277392 :                ? ((xp1 > 0.0)
    3537      138696 :                       ? ((x > 0.0)
    3538      124338 :                              ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3539       89912 :                                    6.0 * x * x * x
    3540             :                              : 0.0) +
    3541      124338 :                             -4.0 * xp1 * xp1 * xp1
    3542             :                       : 0.0) +
    3543             :                      xp2c
    3544      138696 :                : 0.0;  // * 0.166666666666666666666
    3545             : }
    3546             : 
    3547     2220360 : static double GWKBSpline4Values(double *padfValues)
    3548             : {
    3549    11101800 :     for (int i = 0; i < 4; i++)
    3550             :     {
    3551     8881440 :         const double x = padfValues[i];
    3552     8881440 :         const double xp2 = x + 2.0;
    3553     8881440 :         const double xp1 = x + 1.0;
    3554     8881440 :         const double xm1 = x - 1.0;
    3555             : 
    3556             :         // This will most likely be used, so we'll compute it ahead of time to
    3557             :         // avoid stalling the processor.
    3558     8881440 :         const double xp2c = xp2 * xp2 * xp2;
    3559             : 
    3560             :         // Note that the test is computed only if it is needed.
    3561             :         // TODO(schwehr): Make this easier to follow.
    3562     8881440 :         padfValues[i] =
    3563             :             (xp2 > 0.0)
    3564    17762900 :                 ? ((xp1 > 0.0)
    3565     8881440 :                        ? ((x > 0.0)
    3566     6660880 :                               ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3567     4437750 :                                     6.0 * x * x * x
    3568             :                               : 0.0) +
    3569     6660880 :                              -4.0 * xp1 * xp1 * xp1
    3570             :                        : 0.0) +
    3571             :                       xp2c
    3572             :                 : 0.0;  // * 0.166666666666666666666
    3573             :     }
    3574     2220360 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3575             : }
    3576             : /************************************************************************/
    3577             : /*                       GWKResampleWrkStruct                           */
    3578             : /************************************************************************/
    3579             : 
    3580             : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
    3581             : 
    3582             : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
    3583             :                                    double dfSrcX, double dfSrcY,
    3584             :                                    double *pdfDensity, double *pdfReal,
    3585             :                                    double *pdfImag,
    3586             :                                    GWKResampleWrkStruct *psWrkStruct);
    3587             : 
    3588             : struct _GWKResampleWrkStruct
    3589             : {
    3590             :     pfnGWKResampleType pfnGWKResample;
    3591             : 
    3592             :     // Space for saved X weights.
    3593             :     double *padfWeightsX;
    3594             :     bool *pabCalcX;
    3595             : 
    3596             :     double *padfWeightsY;       // Only used by GWKResampleOptimizedLanczos.
    3597             :     int iLastSrcX;              // Only used by GWKResampleOptimizedLanczos.
    3598             :     int iLastSrcY;              // Only used by GWKResampleOptimizedLanczos.
    3599             :     double dfLastDeltaX;        // Only used by GWKResampleOptimizedLanczos.
    3600             :     double dfLastDeltaY;        // Only used by GWKResampleOptimizedLanczos.
    3601             :     double dfCosPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3602             :     double dfSinPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3603             :     double dfCosPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3604             :     double dfSinPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3605             :     double dfCosPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3606             :     double dfSinPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3607             :     double dfCosPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3608             :     double dfSinPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3609             : 
    3610             :     // Space for saving a row of pixels.
    3611             :     double *padfRowDensity;
    3612             :     double *padfRowReal;
    3613             :     double *padfRowImag;
    3614             : };
    3615             : 
    3616             : /************************************************************************/
    3617             : /*                    GWKResampleCreateWrkStruct()                      */
    3618             : /************************************************************************/
    3619             : 
    3620             : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3621             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3622             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
    3623             : 
    3624             : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3625             :                                         double dfSrcX, double dfSrcY,
    3626             :                                         double *pdfDensity, double *pdfReal,
    3627             :                                         double *pdfImag,
    3628             :                                         GWKResampleWrkStruct *psWrkStruct);
    3629             : 
    3630         344 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
    3631             : {
    3632         344 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3633         344 :     const int nYDist = (poWK->nYRadius + 1) * 2;
    3634             : 
    3635             :     GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
    3636         344 :         CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
    3637             : 
    3638             :     // Alloc space for saved X weights.
    3639         344 :     psWrkStruct->padfWeightsX =
    3640         344 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3641         344 :     psWrkStruct->pabCalcX =
    3642         344 :         static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
    3643             : 
    3644         344 :     psWrkStruct->padfWeightsY =
    3645         344 :         static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
    3646         344 :     psWrkStruct->iLastSrcX = -10;
    3647         344 :     psWrkStruct->iLastSrcY = -10;
    3648         344 :     psWrkStruct->dfLastDeltaX = -10;
    3649         344 :     psWrkStruct->dfLastDeltaY = -10;
    3650             : 
    3651             :     // Alloc space for saving a row of pixels.
    3652         344 :     if (poWK->pafUnifiedSrcDensity == nullptr &&
    3653         316 :         poWK->panUnifiedSrcValid == nullptr &&
    3654         304 :         poWK->papanBandSrcValid == nullptr)
    3655             :     {
    3656         304 :         psWrkStruct->padfRowDensity = nullptr;
    3657             :     }
    3658             :     else
    3659             :     {
    3660          40 :         psWrkStruct->padfRowDensity =
    3661          40 :             static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3662             :     }
    3663         344 :     psWrkStruct->padfRowReal =
    3664         344 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3665         344 :     psWrkStruct->padfRowImag =
    3666         344 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3667             : 
    3668         344 :     if (poWK->eResample == GRA_Lanczos)
    3669             :     {
    3670          63 :         psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
    3671             : 
    3672          63 :         if (poWK->dfXScale < 1)
    3673             :         {
    3674           4 :             psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
    3675           4 :             psWrkStruct->dfSinPiXScaleOver3 =
    3676           4 :                 sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
    3677           4 :                              psWrkStruct->dfCosPiXScaleOver3);
    3678             :             // "Naive":
    3679             :             // const double dfCosPiXScale = cos(  M_PI * dfXScale );
    3680             :             // const double dfSinPiXScale = sin(  M_PI * dfXScale );
    3681             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3682           4 :             psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
    3683           4 :                                               psWrkStruct->dfCosPiXScaleOver3 -
    3684           4 :                                           3) *
    3685           4 :                                          psWrkStruct->dfCosPiXScaleOver3;
    3686           4 :             psWrkStruct->dfSinPiXScale = sqrt(
    3687           4 :                 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
    3688             :         }
    3689             : 
    3690          63 :         if (poWK->dfYScale < 1)
    3691             :         {
    3692          11 :             psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
    3693          11 :             psWrkStruct->dfSinPiYScaleOver3 =
    3694          11 :                 sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
    3695          11 :                              psWrkStruct->dfCosPiYScaleOver3);
    3696             :             // "Naive":
    3697             :             // const double dfCosPiYScale = cos(  M_PI * dfYScale );
    3698             :             // const double dfSinPiYScale = sin(  M_PI * dfYScale );
    3699             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3700          11 :             psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
    3701          11 :                                               psWrkStruct->dfCosPiYScaleOver3 -
    3702          11 :                                           3) *
    3703          11 :                                          psWrkStruct->dfCosPiYScaleOver3;
    3704          11 :             psWrkStruct->dfSinPiYScale = sqrt(
    3705          11 :                 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
    3706             :         }
    3707             :     }
    3708             :     else
    3709         281 :         psWrkStruct->pfnGWKResample = GWKResample;
    3710             : 
    3711         344 :     return psWrkStruct;
    3712             : }
    3713             : 
    3714             : /************************************************************************/
    3715             : /*                    GWKResampleDeleteWrkStruct()                      */
    3716             : /************************************************************************/
    3717             : 
    3718         344 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
    3719             : {
    3720         344 :     CPLFree(psWrkStruct->padfWeightsX);
    3721         344 :     CPLFree(psWrkStruct->padfWeightsY);
    3722         344 :     CPLFree(psWrkStruct->pabCalcX);
    3723         344 :     CPLFree(psWrkStruct->padfRowDensity);
    3724         344 :     CPLFree(psWrkStruct->padfRowReal);
    3725         344 :     CPLFree(psWrkStruct->padfRowImag);
    3726         344 :     CPLFree(psWrkStruct);
    3727         344 : }
    3728             : 
    3729             : /************************************************************************/
    3730             : /*                           GWKResample()                              */
    3731             : /************************************************************************/
    3732             : 
    3733      239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3734             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3735             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
    3736             : 
    3737             : {
    3738             :     // Save as local variables to avoid following pointers in loops.
    3739      239383 :     const int nSrcXSize = poWK->nSrcXSize;
    3740      239383 :     const int nSrcYSize = poWK->nSrcYSize;
    3741             : 
    3742      239383 :     double dfAccumulatorReal = 0.0;
    3743      239383 :     double dfAccumulatorImag = 0.0;
    3744      239383 :     double dfAccumulatorDensity = 0.0;
    3745      239383 :     double dfAccumulatorWeight = 0.0;
    3746      239383 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3747      239383 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3748      239383 :     const GPtrDiff_t iSrcOffset =
    3749      239383 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    3750      239383 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3751      239383 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3752             : 
    3753      239383 :     const double dfXScale = poWK->dfXScale;
    3754      239383 :     const double dfYScale = poWK->dfYScale;
    3755             : 
    3756      239383 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3757             : 
    3758             :     // Space for saved X weights.
    3759      239383 :     double *padfWeightsX = psWrkStruct->padfWeightsX;
    3760      239383 :     bool *pabCalcX = psWrkStruct->pabCalcX;
    3761             : 
    3762             :     // Space for saving a row of pixels.
    3763      239383 :     double *padfRowDensity = psWrkStruct->padfRowDensity;
    3764      239383 :     double *padfRowReal = psWrkStruct->padfRowReal;
    3765      239383 :     double *padfRowImag = psWrkStruct->padfRowImag;
    3766             : 
    3767             :     // Mark as needing calculation (don't calculate the weights yet,
    3768             :     // because a mask may render it unnecessary).
    3769      239383 :     memset(pabCalcX, false, nXDist * sizeof(bool));
    3770             : 
    3771      239383 :     FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
    3772      239383 :     CPLAssert(pfnGetWeight);
    3773             : 
    3774             :     // Skip sampling over edge of image.
    3775      239383 :     int j = poWK->nFiltInitY;
    3776      239383 :     int jMax = poWK->nYRadius;
    3777      239383 :     if (iSrcY + j < 0)
    3778         566 :         j = -iSrcY;
    3779      239383 :     if (iSrcY + jMax >= nSrcYSize)
    3780         662 :         jMax = nSrcYSize - iSrcY - 1;
    3781             : 
    3782      239383 :     int iMin = poWK->nFiltInitX;
    3783      239383 :     int iMax = poWK->nXRadius;
    3784      239383 :     if (iSrcX + iMin < 0)
    3785         566 :         iMin = -iSrcX;
    3786      239383 :     if (iSrcX + iMax >= nSrcXSize)
    3787         659 :         iMax = nSrcXSize - iSrcX - 1;
    3788             : 
    3789      239383 :     const int bXScaleBelow1 = (dfXScale < 1.0);
    3790      239383 :     const int bYScaleBelow1 = (dfYScale < 1.0);
    3791             : 
    3792      239383 :     GPtrDiff_t iRowOffset =
    3793      239383 :         iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
    3794             : 
    3795             :     // Loop over pixel rows in the kernel.
    3796     1445930 :     for (; j <= jMax; ++j)
    3797             :     {
    3798     1206540 :         iRowOffset += nSrcXSize;
    3799             : 
    3800             :         // Get pixel values.
    3801             :         // We can potentially read extra elements after the "normal" end of the
    3802             :         // source arrays, but the contract of papabySrcImage[iBand],
    3803             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    3804             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    3805     1206540 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    3806             :                             padfRowDensity, padfRowReal, padfRowImag))
    3807          72 :             continue;
    3808             : 
    3809             :         // Calculate the Y weight.
    3810             :         double dfWeight1 = (bYScaleBelow1)
    3811     1206470 :                                ? pfnGetWeight((j - dfDeltaY) * dfYScale)
    3812        1600 :                                : pfnGetWeight(j - dfDeltaY);
    3813             : 
    3814             :         // Iterate over pixels in row.
    3815     1206470 :         double dfAccumulatorRealLocal = 0.0;
    3816     1206470 :         double dfAccumulatorImagLocal = 0.0;
    3817     1206470 :         double dfAccumulatorDensityLocal = 0.0;
    3818     1206470 :         double dfAccumulatorWeightLocal = 0.0;
    3819             : 
    3820     7317420 :         for (int i = iMin; i <= iMax; ++i)
    3821             :         {
    3822             :             // Skip sampling if pixel has zero density.
    3823     6110940 :             if (padfRowDensity != nullptr &&
    3824       77277 :                 padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
    3825         546 :                 continue;
    3826             : 
    3827     6110400 :             double dfWeight2 = 0.0;
    3828             : 
    3829             :             // Make or use a cached set of weights for this row.
    3830     6110400 :             if (pabCalcX[i - iMin])
    3831             :             {
    3832             :                 // Use saved weight value instead of recomputing it.
    3833     4903920 :                 dfWeight2 = padfWeightsX[i - iMin];
    3834             :             }
    3835             :             else
    3836             :             {
    3837             :                 // Calculate & save the X weight.
    3838     1206480 :                 padfWeightsX[i - iMin] = dfWeight2 =
    3839     1206480 :                     (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
    3840        1600 :                                     : pfnGetWeight(i - dfDeltaX);
    3841             : 
    3842     1206480 :                 pabCalcX[i - iMin] = true;
    3843             :             }
    3844             : 
    3845             :             // Accumulate!
    3846     6110400 :             dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
    3847     6110400 :             dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
    3848     6110400 :             if (padfRowDensity != nullptr)
    3849       76731 :                 dfAccumulatorDensityLocal +=
    3850       76731 :                     padfRowDensity[i - iMin] * dfWeight2;
    3851     6110400 :             dfAccumulatorWeightLocal += dfWeight2;
    3852             :         }
    3853             : 
    3854     1206470 :         dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
    3855     1206470 :         dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
    3856     1206470 :         dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
    3857     1206470 :         dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
    3858             :     }
    3859             : 
    3860      239383 :     if (dfAccumulatorWeight < 0.000001 ||
    3861        1887 :         (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
    3862             :     {
    3863           0 :         *pdfDensity = 0.0;
    3864           0 :         return false;
    3865             :     }
    3866             : 
    3867             :     // Calculate the output taking into account weighting.
    3868      239383 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    3869             :     {
    3870      239380 :         *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
    3871      239380 :         *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
    3872      239380 :         if (padfRowDensity != nullptr)
    3873        1884 :             *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
    3874             :         else
    3875      237496 :             *pdfDensity = 1.0;
    3876             :     }
    3877             :     else
    3878             :     {
    3879           3 :         *pdfReal = dfAccumulatorReal;
    3880           3 :         *pdfImag = dfAccumulatorImag;
    3881           3 :         if (padfRowDensity != nullptr)
    3882           3 :             *pdfDensity = dfAccumulatorDensity;
    3883             :         else
    3884           0 :             *pdfDensity = 1.0;
    3885             :     }
    3886             : 
    3887      239383 :     return true;
    3888             : }
    3889             : 
    3890             : /************************************************************************/
    3891             : /*                      GWKResampleOptimizedLanczos()                   */
    3892             : /************************************************************************/
    3893             : 
    3894      617144 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3895             :                                         double dfSrcX, double dfSrcY,
    3896             :                                         double *pdfDensity, double *pdfReal,
    3897             :                                         double *pdfImag,
    3898             :                                         GWKResampleWrkStruct *psWrkStruct)
    3899             : 
    3900             : {
    3901             :     // Save as local variables to avoid following pointers in loops.
    3902      617144 :     const int nSrcXSize = poWK->nSrcXSize;
    3903      617144 :     const int nSrcYSize = poWK->nSrcYSize;
    3904             : 
    3905      617144 :     double dfAccumulatorReal = 0.0;
    3906      617144 :     double dfAccumulatorImag = 0.0;
    3907      617144 :     double dfAccumulatorDensity = 0.0;
    3908      617144 :     double dfAccumulatorWeight = 0.0;
    3909      617144 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3910      617144 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3911      617144 :     const GPtrDiff_t iSrcOffset =
    3912      617144 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    3913      617144 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3914      617144 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3915             : 
    3916      617144 :     const double dfXScale = poWK->dfXScale;
    3917      617144 :     const double dfYScale = poWK->dfYScale;
    3918             : 
    3919             :     // Space for saved X weights.
    3920      617144 :     double *const padfWeightsXShifted =
    3921      617144 :         psWrkStruct->padfWeightsX - poWK->nFiltInitX;
    3922      617144 :     double *const padfWeightsYShifted =
    3923      617144 :         psWrkStruct->padfWeightsY - poWK->nFiltInitY;
    3924             : 
    3925             :     // Space for saving a row of pixels.
    3926      617144 :     double *const padfRowDensity = psWrkStruct->padfRowDensity;
    3927      617144 :     double *const padfRowReal = psWrkStruct->padfRowReal;
    3928      617144 :     double *const padfRowImag = psWrkStruct->padfRowImag;
    3929             : 
    3930             :     // Skip sampling over edge of image.
    3931      617144 :     int jMin = poWK->nFiltInitY;
    3932      617144 :     int jMax = poWK->nYRadius;
    3933      617144 :     if (iSrcY + jMin < 0)
    3934       16522 :         jMin = -iSrcY;
    3935      617144 :     if (iSrcY + jMax >= nSrcYSize)
    3936        5782 :         jMax = nSrcYSize - iSrcY - 1;
    3937             : 
    3938      617144 :     int iMin = poWK->nFiltInitX;
    3939      617144 :     int iMax = poWK->nXRadius;
    3940      617144 :     if (iSrcX + iMin < 0)
    3941       15797 :         iMin = -iSrcX;
    3942      617144 :     if (iSrcX + iMax >= nSrcXSize)
    3943        4657 :         iMax = nSrcXSize - iSrcX - 1;
    3944             : 
    3945      617144 :     if (dfXScale < 1.0)
    3946             :     {
    3947      403041 :         while ((iMin - dfDeltaX) * dfXScale < -3.0)
    3948      200179 :             iMin++;
    3949      202862 :         while ((iMax - dfDeltaX) * dfXScale > 3.0)
    3950           0 :             iMax--;
    3951             : 
    3952             :         // clang-format off
    3953             :         /*
    3954             :         Naive version:
    3955             :         for (int i = iMin; i <= iMax; ++i)
    3956             :         {
    3957             :             psWrkStruct->padfWeightsXShifted[i] =
    3958             :                 GWKLanczosSinc((i - dfDeltaX) * dfXScale);
    3959             :         }
    3960             : 
    3961             :         but given that:
    3962             : 
    3963             :         GWKLanczosSinc(x):
    3964             :             if (dfX == 0.0)
    3965             :                 return 1.0;
    3966             : 
    3967             :             const double dfPIX = M_PI * dfX;
    3968             :             const double dfPIXoverR = dfPIX / 3;
    3969             :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3970             :             return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
    3971             : 
    3972             :         and
    3973             :             sin (a + b) = sin a cos b + cos a sin b.
    3974             :             cos (a + b) = cos a cos b - sin a sin b.
    3975             : 
    3976             :         we can skip any sin() computation within the loop
    3977             :         */
    3978             :         // clang-format on
    3979             : 
    3980      202862 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    3981      131072 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    3982             :         {
    3983       71790 :             double dfX = (iMin - dfDeltaX) * dfXScale;
    3984             : 
    3985       71790 :             double dfPIXover3 = M_PI / 3 * dfX;
    3986       71790 :             double dfCosOver3 = cos(dfPIXover3);
    3987       71790 :             double dfSinOver3 = sin(dfPIXover3);
    3988             : 
    3989             :             // "Naive":
    3990             :             // double dfSin = sin( M_PI * dfX );
    3991             :             // double dfCos = cos( M_PI * dfX );
    3992             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    3993       71790 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    3994       71790 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    3995             : 
    3996       71790 :             const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
    3997       71790 :             const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
    3998       71790 :             const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
    3999       71790 :             const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
    4000       71790 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4001       71790 :             padfWeightsXShifted[iMin] =
    4002       71790 :                 dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
    4003     1636480 :             for (int i = iMin + 1; i <= iMax; ++i)
    4004             :             {
    4005     1564690 :                 dfX += dfXScale;
    4006     1564690 :                 const double dfNewSin =
    4007     1564690 :                     dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
    4008     1564690 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
    4009     1564690 :                                              dfCosOver3 * dfSinPiXScaleOver3;
    4010     1564690 :                 padfWeightsXShifted[i] =
    4011             :                     dfX == 0
    4012     1564690 :                         ? 1.0
    4013     1564690 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
    4014     1564690 :                 const double dfNewCos =
    4015     1564690 :                     dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
    4016     1564690 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
    4017     1564690 :                                              dfSinOver3 * dfSinPiXScaleOver3;
    4018     1564690 :                 dfSin = dfNewSin;
    4019     1564690 :                 dfCos = dfNewCos;
    4020     1564690 :                 dfSinOver3 = dfNewSinOver3;
    4021     1564690 :                 dfCosOver3 = dfNewCosOver3;
    4022             :             }
    4023             : 
    4024       71790 :             psWrkStruct->iLastSrcX = iSrcX;
    4025       71790 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4026             :         }
    4027             :     }
    4028             :     else
    4029             :     {
    4030      757542 :         while (iMin - dfDeltaX < -3.0)
    4031      343260 :             iMin++;
    4032      414282 :         while (iMax - dfDeltaX > 3.0)
    4033           0 :             iMax--;
    4034             : 
    4035      414282 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    4036      209580 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    4037             :         {
    4038             :             // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
    4039             :             // following trigonometric formulas.
    4040             : 
    4041             :             // TODO(schwehr): Move this somewhere where it can be rendered at
    4042             :             // LaTeX.
    4043             :             // clang-format off
    4044             :             // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
    4045             :             //                            cos(M_PI * dfBase) * sin(M_PI * k)
    4046             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
    4047             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
    4048             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
    4049             : 
    4050             :             // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
    4051             :             //                                  cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
    4052             :             // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
    4053             :             // clang-format on
    4054             : 
    4055      414282 :             const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
    4056      414282 :             const double dfSin2PIDeltaXOver3 =
    4057             :                 dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
    4058             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
    4059      414282 :             const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
    4060      414282 :             const double dfSinPIDeltaX =
    4061      414282 :                 (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
    4062      414282 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4063      414282 :             const double dfInvPI2Over3xSinPIDeltaX =
    4064             :                 dfInvPI2Over3 * dfSinPIDeltaX;
    4065      414282 :             const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
    4066      414282 :                 -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
    4067      414282 :             const double dfSinPIOver3 = 0.8660254037844386;
    4068      414282 :             const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
    4069      414282 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
    4070             :             const double padfCst[] = {
    4071      414282 :                 dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
    4072      414282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
    4073             :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
    4074      414282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
    4075      414282 :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
    4076             : 
    4077     2936860 :             for (int i = iMin; i <= iMax; ++i)
    4078             :             {
    4079     2522570 :                 const double dfX = i - dfDeltaX;
    4080     2522570 :                 if (dfX == 0.0)
    4081       58282 :                     padfWeightsXShifted[i] = 1.0;
    4082             :                 else
    4083     2464290 :                     padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
    4084             : #if DEBUG_VERBOSE
    4085             :                     // TODO(schwehr): AlmostEqual.
    4086             :                     // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
    4087             :                     //               GWKLanczosSinc(dfX, 3.0)) < 1e-10);
    4088             : #endif
    4089             :             }
    4090             : 
    4091      414282 :             psWrkStruct->iLastSrcX = iSrcX;
    4092      414282 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4093             :         }
    4094             :     }
    4095             : 
    4096      617144 :     if (dfYScale < 1.0)
    4097             :     {
    4098      403116 :         while ((jMin - dfDeltaY) * dfYScale < -3.0)
    4099      200254 :             jMin++;
    4100      202862 :         while ((jMax - dfDeltaY) * dfYScale > 3.0)
    4101           0 :             jMax--;
    4102             : 
    4103             :         // clang-format off
    4104             :         /*
    4105             :         Naive version:
    4106             :         for (int j = jMin; j <= jMax; ++j)
    4107             :         {
    4108             :             padfWeightsYShifted[j] =
    4109             :                 GWKLanczosSinc((j - dfDeltaY) * dfYScale);
    4110             :         }
    4111             :         */
    4112             :         // clang-format on
    4113             : 
    4114      202862 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4115      202479 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4116             :         {
    4117         383 :             double dfY = (jMin - dfDeltaY) * dfYScale;
    4118             : 
    4119         383 :             double dfPIYover3 = M_PI / 3 * dfY;
    4120         383 :             double dfCosOver3 = cos(dfPIYover3);
    4121         383 :             double dfSinOver3 = sin(dfPIYover3);
    4122             : 
    4123             :             // "Naive":
    4124             :             // double dfSin = sin( M_PI * dfY );
    4125             :             // double dfCos = cos( M_PI * dfY );
    4126             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4127         383 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4128         383 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4129             : 
    4130         383 :             const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
    4131         383 :             const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
    4132         383 :             const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
    4133         383 :             const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
    4134         383 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4135         383 :             padfWeightsYShifted[jMin] =
    4136         383 :                 dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
    4137        7318 :             for (int j = jMin + 1; j <= jMax; ++j)
    4138             :             {
    4139        6935 :                 dfY += dfYScale;
    4140        6935 :                 const double dfNewSin =
    4141        6935 :                     dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
    4142        6935 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
    4143        6935 :                                              dfCosOver3 * dfSinPiYScaleOver3;
    4144        6935 :                 padfWeightsYShifted[j] =
    4145             :                     dfY == 0
    4146        6935 :                         ? 1.0
    4147        6935 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
    4148        6935 :                 const double dfNewCos =
    4149        6935 :                     dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
    4150        6935 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
    4151        6935 :                                              dfSinOver3 * dfSinPiYScaleOver3;
    4152        6935 :                 dfSin = dfNewSin;
    4153        6935 :                 dfCos = dfNewCos;
    4154        6935 :                 dfSinOver3 = dfNewSinOver3;
    4155        6935 :                 dfCosOver3 = dfNewCosOver3;
    4156             :             }
    4157             : 
    4158         383 :             psWrkStruct->iLastSrcY = iSrcY;
    4159         383 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4160             :         }
    4161             :     }
    4162             :     else
    4163             :     {
    4164      684742 :         while (jMin - dfDeltaY < -3.0)
    4165      270460 :             jMin++;
    4166      414282 :         while (jMax - dfDeltaY > 3.0)
    4167           0 :             jMax--;
    4168             : 
    4169      414282 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4170      413663 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4171             :         {
    4172        1132 :             const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
    4173        1132 :             const double dfSin2PIDeltaYOver3 =
    4174             :                 dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
    4175             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
    4176        1132 :             const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
    4177        1132 :             const double dfSinPIDeltaY =
    4178        1132 :                 (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
    4179        1132 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4180        1132 :             const double dfInvPI2Over3xSinPIDeltaY =
    4181             :                 dfInvPI2Over3 * dfSinPIDeltaY;
    4182        1132 :             const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
    4183        1132 :                 -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
    4184        1132 :             const double dfSinPIOver3 = 0.8660254037844386;
    4185        1132 :             const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
    4186        1132 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
    4187             :             const double padfCst[] = {
    4188        1132 :                 dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
    4189        1132 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
    4190             :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
    4191        1132 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
    4192        1132 :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
    4193             : 
    4194        7917 :             for (int j = jMin; j <= jMax; ++j)
    4195             :             {
    4196        6785 :                 const double dfY = j - dfDeltaY;
    4197        6785 :                 if (dfY == 0.0)
    4198         460 :                     padfWeightsYShifted[j] = 1.0;
    4199             :                 else
    4200        6325 :                     padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
    4201             : #if DEBUG_VERBOSE
    4202             :                     // TODO(schwehr): AlmostEqual.
    4203             :                     // CPLAssert(fabs(padfWeightsYShifted[j] -
    4204             :                     //               GWKLanczosSinc(dfY, 3.0)) < 1e-10);
    4205             : #endif
    4206             :             }
    4207             : 
    4208        1132 :             psWrkStruct->iLastSrcY = iSrcY;
    4209        1132 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4210             :         }
    4211             :     }
    4212             : 
    4213             :     // If we have no density information, we can simply compute the
    4214             :     // accumulated weight.
    4215      617144 :     if (padfRowDensity == nullptr)
    4216             :     {
    4217      617144 :         double dfRowAccWeight = 0.0;
    4218     7903490 :         for (int i = iMin; i <= iMax; ++i)
    4219             :         {
    4220     7286350 :             dfRowAccWeight += padfWeightsXShifted[i];
    4221             :         }
    4222      617144 :         double dfColAccWeight = 0.0;
    4223     7958040 :         for (int j = jMin; j <= jMax; ++j)
    4224             :         {
    4225     7340900 :             dfColAccWeight += padfWeightsYShifted[j];
    4226             :         }
    4227      617144 :         dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
    4228             :     }
    4229             : 
    4230             :     // Loop over pixel rows in the kernel.
    4231             : 
    4232      617144 :     if (poWK->eWorkingDataType == GDT_Byte && !poWK->panUnifiedSrcValid &&
    4233      616524 :         !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
    4234             :         !padfRowDensity)
    4235             :     {
    4236             :         // Optimization for Byte case without any masking/alpha
    4237             : 
    4238      616524 :         if (dfAccumulatorWeight < 0.000001)
    4239             :         {
    4240           0 :             *pdfDensity = 0.0;
    4241           0 :             return false;
    4242             :         }
    4243             : 
    4244      616524 :         const GByte *pSrc =
    4245      616524 :             reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
    4246      616524 :         pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4247             : 
    4248             : #if defined(USE_SSE2)
    4249      616524 :         if (iMax - iMin + 1 == 6)
    4250             :         {
    4251             :             // This is just an optimized version of the general case in
    4252             :             // the else clause.
    4253             : 
    4254      346854 :             pSrc += iMin;
    4255      346854 :             int j = jMin;
    4256             :             const auto fourXWeights =
    4257      346854 :                 XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
    4258             : 
    4259             :             // Process 2 lines at the same time.
    4260     1375860 :             for (; j < jMax; j += 2)
    4261             :             {
    4262             :                 const XMMReg4Double v_acc =
    4263     1029000 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4264             :                 const XMMReg4Double v_acc2 =
    4265     1029000 :                     XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
    4266     1029000 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4267     1029000 :                 const double dfRowAccEnd =
    4268     1029000 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4269     1029000 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4270     1029000 :                 dfAccumulatorReal +=
    4271     1029000 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4272     1029000 :                 const double dfRowAcc2 = v_acc2.GetHorizSum();
    4273     1029000 :                 const double dfRowAcc2End =
    4274     1029000 :                     pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
    4275     1029000 :                     pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
    4276     1029000 :                 dfAccumulatorReal +=
    4277     1029000 :                     (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
    4278     1029000 :                 pSrc += 2 * nSrcXSize;
    4279             :             }
    4280      346854 :             if (j == jMax)
    4281             :             {
    4282             :                 // Process last line if there's an odd number of them.
    4283             : 
    4284             :                 const XMMReg4Double v_acc =
    4285       86045 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4286       86045 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4287       86045 :                 const double dfRowAccEnd =
    4288       86045 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4289       86045 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4290       86045 :                 dfAccumulatorReal +=
    4291       86045 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4292             :             }
    4293             :         }
    4294             :         else
    4295             : #endif
    4296             :         {
    4297     5463580 :             for (int j = jMin; j <= jMax; ++j)
    4298             :             {
    4299     5193900 :                 int i = iMin;
    4300     5193900 :                 double dfRowAcc1 = 0.0;
    4301     5193900 :                 double dfRowAcc2 = 0.0;
    4302             :                 // A bit of loop unrolling
    4303    62750600 :                 for (; i < iMax; i += 2)
    4304             :                 {
    4305    57556700 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4306    57556700 :                     dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
    4307             :                 }
    4308     5193900 :                 if (i == iMax)
    4309             :                 {
    4310             :                     // Process last column if there's an odd number of them.
    4311      426183 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4312             :                 }
    4313             : 
    4314     5193900 :                 dfAccumulatorReal +=
    4315     5193900 :                     (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
    4316     5193900 :                 pSrc += nSrcXSize;
    4317             :             }
    4318             :         }
    4319             : 
    4320             :         // Calculate the output taking into account weighting.
    4321      616524 :         if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4322             :         {
    4323      569230 :             const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4324      569230 :             *pdfReal = dfAccumulatorReal * dfInvAcc;
    4325      569230 :             *pdfDensity = 1.0;
    4326             :         }
    4327             :         else
    4328             :         {
    4329       47294 :             *pdfReal = dfAccumulatorReal;
    4330       47294 :             *pdfDensity = 1.0;
    4331             :         }
    4332             : 
    4333      616524 :         return true;
    4334             :     }
    4335             : 
    4336         620 :     GPtrDiff_t iRowOffset =
    4337         620 :         iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
    4338             : 
    4339         620 :     int nCountValid = 0;
    4340         620 :     const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
    4341             : 
    4342        3560 :     for (int j = jMin; j <= jMax; ++j)
    4343             :     {
    4344        2940 :         iRowOffset += nSrcXSize;
    4345             : 
    4346             :         // Get pixel values.
    4347             :         // We can potentially read extra elements after the "normal" end of the
    4348             :         // source arrays, but the contract of papabySrcImage[iBand],
    4349             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4350             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4351        2940 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4352             :                             padfRowDensity, padfRowReal, padfRowImag))
    4353           0 :             continue;
    4354             : 
    4355        2940 :         const double dfWeight1 = padfWeightsYShifted[j];
    4356             : 
    4357             :         // Iterate over pixels in row.
    4358        2940 :         if (padfRowDensity != nullptr)
    4359             :         {
    4360           0 :             for (int i = iMin; i <= iMax; ++i)
    4361             :             {
    4362             :                 // Skip sampling if pixel has zero density.
    4363           0 :                 if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
    4364           0 :                     continue;
    4365             : 
    4366           0 :                 nCountValid++;
    4367             : 
    4368             :                 //  Use a cached set of weights for this row.
    4369           0 :                 const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
    4370             : 
    4371             :                 // Accumulate!
    4372           0 :                 dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
    4373           0 :                 dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
    4374           0 :                 dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
    4375           0 :                 dfAccumulatorWeight += dfWeight2;
    4376             :             }
    4377             :         }
    4378        2940 :         else if (bIsNonComplex)
    4379             :         {
    4380        1764 :             double dfRowAccReal = 0.0;
    4381       10560 :             for (int i = iMin; i <= iMax; ++i)
    4382             :             {
    4383        8796 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4384             : 
    4385             :                 // Accumulate!
    4386        8796 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4387             :             }
    4388             : 
    4389        1764 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4390             :         }
    4391             :         else
    4392             :         {
    4393        1176 :             double dfRowAccReal = 0.0;
    4394        1176 :             double dfRowAccImag = 0.0;
    4395        7040 :             for (int i = iMin; i <= iMax; ++i)
    4396             :             {
    4397        5864 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4398             : 
    4399             :                 // Accumulate!
    4400        5864 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4401        5864 :                 dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
    4402             :             }
    4403             : 
    4404        1176 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4405        1176 :             dfAccumulatorImag += dfRowAccImag * dfWeight1;
    4406             :         }
    4407             :     }
    4408             : 
    4409         620 :     if (dfAccumulatorWeight < 0.000001 ||
    4410           0 :         (padfRowDensity != nullptr &&
    4411           0 :          (dfAccumulatorDensity < 0.000001 ||
    4412           0 :           nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
    4413             :     {
    4414           0 :         *pdfDensity = 0.0;
    4415           0 :         return false;
    4416             :     }
    4417             : 
    4418             :     // Calculate the output taking into account weighting.
    4419         620 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4420             :     {
    4421           0 :         const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4422           0 :         *pdfReal = dfAccumulatorReal * dfInvAcc;
    4423           0 :         *pdfImag = dfAccumulatorImag * dfInvAcc;
    4424           0 :         if (padfRowDensity != nullptr)
    4425           0 :             *pdfDensity = dfAccumulatorDensity * dfInvAcc;
    4426             :         else
    4427           0 :             *pdfDensity = 1.0;
    4428             :     }
    4429             :     else
    4430             :     {
    4431         620 :         *pdfReal = dfAccumulatorReal;
    4432         620 :         *pdfImag = dfAccumulatorImag;
    4433         620 :         if (padfRowDensity != nullptr)
    4434           0 :             *pdfDensity = dfAccumulatorDensity;
    4435             :         else
    4436         620 :             *pdfDensity = 1.0;
    4437             :     }
    4438             : 
    4439         620 :     return true;
    4440             : }
    4441             : 
    4442             : /************************************************************************/
    4443             : /*                        GWKComputeWeights()                           */
    4444             : /************************************************************************/
    4445             : 
    4446     3744970 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
    4447             :                               double dfDeltaX, double dfXScale, int jMin,
    4448             :                               int jMax, double dfDeltaY, double dfYScale,
    4449             :                               double *padfWeightsHorizontal,
    4450             :                               double *padfWeightsVertical, double &dfInvWeights)
    4451             : {
    4452             : 
    4453     3744970 :     const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
    4454     3744970 :     CPLAssert(pfnGetWeight);
    4455     3744970 :     const FilterFunc4ValuesType pfnGetWeight4Values =
    4456     3744970 :         apfGWKFilter4Values[eResample];
    4457     3744970 :     CPLAssert(pfnGetWeight4Values);
    4458             : 
    4459     3744970 :     int i = iMin;  // Used after for.
    4460     3744970 :     int iC = 0;    // Used after for.
    4461             :     // Not zero, but as close as possible to it, to avoid potential division by
    4462             :     // zero at end of function
    4463     3744970 :     double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
    4464     8311770 :     for (; i + 2 < iMax; i += 4, iC += 4)
    4465             :     {
    4466     4568590 :         padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
    4467     4568590 :         padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
    4468     4568590 :         padfWeightsHorizontal[iC + 2] =
    4469     4568590 :             padfWeightsHorizontal[iC + 1] + dfXScale;
    4470     4568590 :         padfWeightsHorizontal[iC + 3] =
    4471     4568590 :             padfWeightsHorizontal[iC + 2] + dfXScale;
    4472     4566800 :         dfAccumulatorWeightHorizontal +=
    4473     4568590 :             pfnGetWeight4Values(padfWeightsHorizontal + iC);
    4474             :     }
    4475     3964100 :     for (; i <= iMax; ++i, ++iC)
    4476             :     {
    4477      219566 :         const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
    4478      220922 :         padfWeightsHorizontal[iC] = dfWeight;
    4479      220922 :         dfAccumulatorWeightHorizontal += dfWeight;
    4480             :     }
    4481             : 
    4482     3744530 :     int j = jMin;  // Used after for.
    4483     3744530 :     int jC = 0;    // Used after for.
    4484             :     // Not zero, but as close as possible to it, to avoid potential division by
    4485             :     // zero at end of function
    4486     3744530 :     double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
    4487     7885720 :     for (; j + 2 < jMax; j += 4, jC += 4)
    4488             :     {
    4489     4148740 :         padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
    4490     4148740 :         padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
    4491     4148740 :         padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
    4492     4148740 :         padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
    4493     4141190 :         dfAccumulatorWeightVertical +=
    4494     4148740 :             pfnGetWeight4Values(padfWeightsVertical + jC);
    4495             :     }
    4496     8263440 :     for (; j <= jMax; ++j, ++jC)
    4497             :     {
    4498     4517490 :         const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
    4499     4526450 :         padfWeightsVertical[jC] = dfWeight;
    4500     4526450 :         dfAccumulatorWeightVertical += dfWeight;
    4501             :     }
    4502             : 
    4503     3745940 :     dfInvWeights =
    4504     3745940 :         1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
    4505     3745940 : }
    4506             : 
    4507             : /************************************************************************/
    4508             : /*                        GWKResampleNoMasksT()                         */
    4509             : /************************************************************************/
    4510             : 
    4511             : template <class T>
    4512             : static bool
    4513             : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    4514             :                     double dfSrcY, T *pValue, double *padfWeightsHorizontal,
    4515             :                     double *padfWeightsVertical, double &dfInvWeights)
    4516             : 
    4517             : {
    4518             :     // Commonly used; save locally.
    4519             :     const int nSrcXSize = poWK->nSrcXSize;
    4520             :     const int nSrcYSize = poWK->nSrcYSize;
    4521             : 
    4522             :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4523             :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4524             :     const GPtrDiff_t iSrcOffset =
    4525             :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4526             : 
    4527             :     const int nXRadius = poWK->nXRadius;
    4528             :     const int nYRadius = poWK->nYRadius;
    4529             : 
    4530             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4531             :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4532             :         nYRadius > nSrcYSize)
    4533             :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4534             :                                                   pValue);
    4535             : 
    4536             :     T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    4537             :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4538             :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4539             : 
    4540             :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4541             :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4542             : 
    4543             :     int iMin = 1 - nXRadius;
    4544             :     if (iSrcX + iMin < 0)
    4545             :         iMin = -iSrcX;
    4546             :     int iMax = nXRadius;
    4547             :     if (iSrcX + iMax >= nSrcXSize - 1)
    4548             :         iMax = nSrcXSize - 1 - iSrcX;
    4549             : 
    4550             :     int jMin = 1 - nYRadius;
    4551             :     if (iSrcY + jMin < 0)
    4552             :         jMin = -iSrcY;
    4553             :     int jMax = nYRadius;
    4554             :     if (iSrcY + jMax >= nSrcYSize - 1)
    4555             :         jMax = nSrcYSize - 1 - iSrcY;
    4556             : 
    4557             :     if (iBand == 0)
    4558             :     {
    4559             :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4560             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4561             :                           padfWeightsVertical, dfInvWeights);
    4562             :     }
    4563             : 
    4564             :     // Loop over all rows in the kernel.
    4565             :     double dfAccumulator = 0.0;
    4566             :     for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
    4567             :     {
    4568             :         const GPtrDiff_t iSampJ =
    4569             :             iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
    4570             : 
    4571             :         // Loop over all pixels in the row.
    4572             :         double dfAccumulatorLocal = 0.0;
    4573             :         double dfAccumulatorLocal2 = 0.0;
    4574             :         int iC = 0;
    4575             :         int i = iMin;
    4576             :         // Process by chunk of 4 cols.
    4577             :         for (; i + 2 < iMax; i += 4, iC += 4)
    4578             :         {
    4579             :             // Retrieve the pixel & accumulate.
    4580             :             dfAccumulatorLocal +=
    4581             :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4582             :             dfAccumulatorLocal +=
    4583             :                 pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
    4584             :             dfAccumulatorLocal2 +=
    4585             :                 pSrcBand[i + 2 + iSampJ] * padfWeightsHorizontal[iC + 2];
    4586             :             dfAccumulatorLocal2 +=
    4587             :                 pSrcBand[i + 3 + iSampJ] * padfWeightsHorizontal[iC + 3];
    4588             :         }
    4589             :         dfAccumulatorLocal += dfAccumulatorLocal2;
    4590             :         if (i < iMax)
    4591             :         {
    4592             :             dfAccumulatorLocal +=
    4593             :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4594             :             dfAccumulatorLocal +=
    4595             :                 pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
    4596             :             i += 2;
    4597             :             iC += 2;
    4598             :         }
    4599             :         if (i == iMax)
    4600             :         {
    4601             :             dfAccumulatorLocal +=
    4602             :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4603             :         }
    4604             : 
    4605             :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4606             :     }
    4607             : 
    4608             :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4609             : 
    4610             :     return true;
    4611             : }
    4612             : 
    4613             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    4614             : /* Could possibly be used too on 32bit, but we would need to check at runtime */
    4615             : #if defined(USE_SSE2)
    4616             : 
    4617             : /************************************************************************/
    4618             : /*                    GWKResampleNoMasks_SSE2_T()                       */
    4619             : /************************************************************************/
    4620             : 
    4621             : template <class T>
    4622     9141413 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
    4623             :                                       double dfSrcX, double dfSrcY, T *pValue,
    4624             :                                       double *padfWeightsHorizontal,
    4625             :                                       double *padfWeightsVertical,
    4626             :                                       double &dfInvWeights)
    4627             : {
    4628             :     // Commonly used; save locally.
    4629     9141413 :     const int nSrcXSize = poWK->nSrcXSize;
    4630     9141413 :     const int nSrcYSize = poWK->nSrcYSize;
    4631             : 
    4632     9141413 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4633     9141413 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4634     9141413 :     const GPtrDiff_t iSrcOffset =
    4635     9141413 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4636     9141413 :     const int nXRadius = poWK->nXRadius;
    4637     9141413 :     const int nYRadius = poWK->nYRadius;
    4638             : 
    4639             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4640     9141413 :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4641             :         nYRadius > nSrcYSize)
    4642           2 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4643           3 :                                                   pValue);
    4644             : 
    4645     9161211 :     const T *pSrcBand =
    4646     9161211 :         reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    4647             : 
    4648     9161211 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4649     9161211 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4650     9161211 :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4651     9164311 :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4652             : 
    4653     9170271 :     int iMin = 1 - nXRadius;
    4654     9170271 :     if (iSrcX + iMin < 0)
    4655       43130 :         iMin = -iSrcX;
    4656     9170271 :     int iMax = nXRadius;
    4657     9170271 :     if (iSrcX + iMax >= nSrcXSize - 1)
    4658       38080 :         iMax = nSrcXSize - 1 - iSrcX;
    4659             : 
    4660     9170271 :     int jMin = 1 - nYRadius;
    4661     9170271 :     if (iSrcY + jMin < 0)
    4662       49554 :         jMin = -iSrcY;
    4663     9170271 :     int jMax = nYRadius;
    4664     9170271 :     if (iSrcY + jMax >= nSrcYSize - 1)
    4665       35984 :         jMax = nSrcYSize - 1 - iSrcY;
    4666             : 
    4667     9170271 :     if (iBand == 0)
    4668             :     {
    4669     3743281 :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4670             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4671             :                           padfWeightsVertical, dfInvWeights);
    4672             :     }
    4673             : 
    4674     9159941 :     GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4675             :     // Process by chunk of 4 rows.
    4676     9159941 :     int jC = 0;
    4677     9159941 :     int j = jMin;
    4678     9159941 :     double dfAccumulator = 0.0;
    4679    19409893 :     for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
    4680             :     {
    4681             :         // Loop over all pixels in the row.
    4682    10269922 :         int iC = 0;
    4683    10269922 :         int i = iMin;
    4684             :         // Process by chunk of 4 cols.
    4685    10269922 :         XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
    4686    10249512 :         XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
    4687    10248082 :         XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
    4688    10255552 :         XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
    4689    26752280 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4690             :         {
    4691             :             // Retrieve the pixel & accumulate.
    4692    16505288 :             XMMReg4Double v_pixels_1 =
    4693    16505288 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4694    16481488 :             XMMReg4Double v_pixels_2 =
    4695    16481488 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
    4696    16513188 :             XMMReg4Double v_pixels_3 =
    4697    16513188 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4698    16509988 :             XMMReg4Double v_pixels_4 =
    4699    16509988 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4700             : 
    4701    16518988 :             XMMReg4Double v_padfWeight =
    4702    16518988 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4703             : 
    4704    16500588 :             v_acc_1 += v_pixels_1 * v_padfWeight;
    4705    16509188 :             v_acc_2 += v_pixels_2 * v_padfWeight;
    4706    16513988 :             v_acc_3 += v_pixels_3 * v_padfWeight;
    4707    16516788 :             v_acc_4 += v_pixels_4 * v_padfWeight;
    4708             :         }
    4709             : 
    4710    10246972 :         if (i < iMax)
    4711             :         {
    4712      142670 :             XMMReg2Double v_pixels_1 =
    4713      142670 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
    4714      142670 :             XMMReg2Double v_pixels_2 =
    4715      142670 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
    4716      142670 :             XMMReg2Double v_pixels_3 =
    4717      142670 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4718      142669 :             XMMReg2Double v_pixels_4 =
    4719      142669 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4720             : 
    4721      142670 :             XMMReg2Double v_padfWeight =
    4722      142670 :                 XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
    4723             : 
    4724      142670 :             v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
    4725      142670 :             v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
    4726      142670 :             v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
    4727      142670 :             v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
    4728             : 
    4729      142670 :             i += 2;
    4730      142670 :             iC += 2;
    4731             :         }
    4732             : 
    4733    10246972 :         double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
    4734    10246832 :         double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
    4735    10264132 :         double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
    4736    10255742 :         double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
    4737             : 
    4738    10249992 :         if (i == iMax)
    4739             :         {
    4740       49171 :             dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
    4741       49171 :                                     padfWeightsHorizontal[iC];
    4742       49171 :             dfAccumulatorLocal_2 +=
    4743       49171 :                 static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
    4744       49171 :                 padfWeightsHorizontal[iC];
    4745       49171 :             dfAccumulatorLocal_3 +=
    4746       49171 :                 static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
    4747       49171 :                 padfWeightsHorizontal[iC];
    4748       49171 :             dfAccumulatorLocal_4 +=
    4749       49171 :                 static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
    4750       49171 :                 padfWeightsHorizontal[iC];
    4751             :         }
    4752             : 
    4753    10249992 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
    4754    10249992 :         dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
    4755    10249992 :         dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
    4756    10249992 :         dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
    4757             :     }
    4758    22245041 :     for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
    4759             :     {
    4760             :         // Loop over all pixels in the row.
    4761    13076340 :         int iC = 0;
    4762    13076340 :         int i = iMin;
    4763             :         // Process by chunk of 4 cols.
    4764    13076340 :         XMMReg4Double v_acc = XMMReg4Double::Zero();
    4765    26177263 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4766             :         {
    4767             :             // Retrieve the pixel & accumulate.
    4768    13104523 :             XMMReg4Double v_pixels =
    4769    13104523 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4770    13118923 :             XMMReg4Double v_padfWeight =
    4771    13118923 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4772             : 
    4773    13120823 :             v_acc += v_pixels * v_padfWeight;
    4774             :         }
    4775             : 
    4776    13072740 :         double dfAccumulatorLocal = v_acc.GetHorizSum();
    4777             : 
    4778    13105040 :         if (i < iMax)
    4779             :         {
    4780      173904 :             dfAccumulatorLocal +=
    4781      173904 :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4782      173904 :             dfAccumulatorLocal +=
    4783      173904 :                 pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
    4784      173904 :             i += 2;
    4785      173904 :             iC += 2;
    4786             :         }
    4787    13105040 :         if (i == iMax)
    4788             :         {
    4789       33014 :             dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
    4790       33014 :                                   padfWeightsHorizontal[iC];
    4791             :         }
    4792             : 
    4793    13105040 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4794             :     }
    4795             : 
    4796     9168671 :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4797             : 
    4798     9173721 :     return true;
    4799             : }
    4800             : 
    4801             : /************************************************************************/
    4802             : /*                     GWKResampleNoMasksT<GByte>()                     */
    4803             : /************************************************************************/
    4804             : 
    4805             : template <>
    4806     8581550 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
    4807             :                                 double dfSrcX, double dfSrcY, GByte *pValue,
    4808             :                                 double *padfWeightsHorizontal,
    4809             :                                 double *padfWeightsVertical,
    4810             :                                 double &dfInvWeights)
    4811             : {
    4812     8581550 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4813             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4814     8547000 :                                      dfInvWeights);
    4815             : }
    4816             : 
    4817             : /************************************************************************/
    4818             : /*                     GWKResampleNoMasksT<GInt16>()                    */
    4819             : /************************************************************************/
    4820             : 
    4821             : template <>
    4822      252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
    4823             :                                  double dfSrcX, double dfSrcY, GInt16 *pValue,
    4824             :                                  double *padfWeightsHorizontal,
    4825             :                                  double *padfWeightsVertical,
    4826             :                                  double &dfInvWeights)
    4827             : {
    4828      252563 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4829             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4830      252563 :                                      dfInvWeights);
    4831             : }
    4832             : 
    4833             : /************************************************************************/
    4834             : /*                     GWKResampleNoMasksT<GUInt16>()                   */
    4835             : /************************************************************************/
    4836             : 
    4837             : template <>
    4838      343440 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
    4839             :                                   double dfSrcX, double dfSrcY, GUInt16 *pValue,
    4840             :                                   double *padfWeightsHorizontal,
    4841             :                                   double *padfWeightsVertical,
    4842             :                                   double &dfInvWeights)
    4843             : {
    4844      343440 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4845             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4846      343440 :                                      dfInvWeights);
    4847             : }
    4848             : 
    4849             : /************************************************************************/
    4850             : /*                     GWKResampleNoMasksT<float>()                     */
    4851             : /************************************************************************/
    4852             : 
    4853             : template <>
    4854        2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
    4855             :                                 double dfSrcX, double dfSrcY, float *pValue,
    4856             :                                 double *padfWeightsHorizontal,
    4857             :                                 double *padfWeightsVertical,
    4858             :                                 double &dfInvWeights)
    4859             : {
    4860        2500 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4861             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4862        2500 :                                      dfInvWeights);
    4863             : }
    4864             : 
    4865             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    4866             : 
    4867             : /************************************************************************/
    4868             : /*                     GWKResampleNoMasksT<double>()                    */
    4869             : /************************************************************************/
    4870             : 
    4871             : template <>
    4872             : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
    4873             :                                  double dfSrcX, double dfSrcY, double *pValue,
    4874             :                                  double *padfWeightsHorizontal,
    4875             :                                  double *padfWeightsVertical,
    4876             :                                  double &dfInvWeights)
    4877             : {
    4878             :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4879             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4880             :                                      dfInvWeights);
    4881             : }
    4882             : 
    4883             : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
    4884             : 
    4885             : #endif /* defined(USE_SSE2) */
    4886             : 
    4887             : /************************************************************************/
    4888             : /*                     GWKRoundSourceCoordinates()                      */
    4889             : /************************************************************************/
    4890             : 
    4891        1000 : static void GWKRoundSourceCoordinates(
    4892             :     int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
    4893             :     double dfSrcCoordPrecision, double dfErrorThreshold,
    4894             :     GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
    4895             :     double dfDstY)
    4896             : {
    4897        1000 :     double dfPct = 0.8;
    4898        1000 :     if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
    4899             :     {
    4900        1000 :         dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
    4901             :     }
    4902        1000 :     const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
    4903             : 
    4904      501000 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    4905             :     {
    4906      500000 :         const double dfXBefore = padfX[iDstX];
    4907      500000 :         const double dfYBefore = padfY[iDstX];
    4908      500000 :         padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    4909             :                        dfSrcCoordPrecision;
    4910      500000 :         padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    4911             :                        dfSrcCoordPrecision;
    4912             : 
    4913             :         // If we are in an uncertainty zone, go to non-approximated
    4914             :         // transformation.
    4915             :         // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
    4916             :         // be at least 10 times greater than the approximation error.
    4917      500000 :         if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
    4918      399914 :             fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
    4919             :         {
    4920      180090 :             padfX[iDstX] = iDstX + dfDstXOff;
    4921      180090 :             padfY[iDstX] = dfDstY;
    4922      180090 :             padfZ[iDstX] = 0.0;
    4923      180090 :             pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
    4924      180090 :                            padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
    4925      180090 :             padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    4926             :                            dfSrcCoordPrecision;
    4927      180090 :             padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    4928             :                            dfSrcCoordPrecision;
    4929             :         }
    4930             :     }
    4931        1000 : }
    4932             : 
    4933             : /************************************************************************/
    4934             : /*                     GWKCheckAndComputeSrcOffsets()                   */
    4935             : /************************************************************************/
    4936             : static CPL_INLINE bool
    4937   108372000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
    4938             :                              int _iDstY, double *_padfX, double *_padfY,
    4939             :                              int _nSrcXSize, int _nSrcYSize,
    4940             :                              GPtrDiff_t &iSrcOffset)
    4941             : {
    4942   108372000 :     const GDALWarpKernel *_poWK = psJob->poWK;
    4943   108504000 :     for (int iTry = 0; iTry < 2; ++iTry)
    4944             :     {
    4945   108416000 :         if (iTry == 1)
    4946             :         {
    4947             :             // If the source coordinate is slightly outside of the source raster
    4948             :             // retry to transform it alone, so that the exact coordinate
    4949             :             // transformer is used.
    4950             : 
    4951      131944 :             _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
    4952      131944 :             _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
    4953      131944 :             double dfZ = 0;
    4954      131944 :             _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
    4955      131944 :                                   _padfX + _iDstX, _padfY + _iDstX, &dfZ,
    4956      131944 :                                   _pabSuccess + _iDstX);
    4957             :         }
    4958   108416000 :         if (!_pabSuccess[_iDstX])
    4959     3593470 :             return false;
    4960             : 
    4961             :         // If this happens this is likely the symptom of a bug somewhere.
    4962   104822000 :         if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
    4963             :         {
    4964             :             static bool bNanCoordFound = false;
    4965           0 :             if (!bNanCoordFound)
    4966             :             {
    4967           0 :                 CPLDebug("WARP",
    4968             :                          "GWKCheckAndComputeSrcOffsets(): "
    4969             :                          "NaN coordinate found on point %d.",
    4970             :                          _iDstX);
    4971           0 :                 bNanCoordFound = true;
    4972             :             }
    4973           0 :             return false;
    4974             :         }
    4975             : 
    4976             :         /* --------------------------------------------------------------------
    4977             :          */
    4978             :         /*      Figure out what pixel we want in our source raster, and skip */
    4979             :         /*      further processing if it is well off the source image. */
    4980             :         /* --------------------------------------------------------------------
    4981             :          */
    4982             :         /* We test against the value before casting to avoid the */
    4983             :         /* problem of asymmetric truncation effects around zero.  That is */
    4984             :         /* -0.5 will be 0 when cast to an int. */
    4985   104937000 :         if (_padfX[_iDstX] < _poWK->nSrcXOff)
    4986             :         {
    4987             :             // If the source coordinate is slightly outside of the source raster
    4988             :             // retry to transform it alone, so that the exact coordinate
    4989             :             // transformer is used.
    4990     4137660 :             if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
    4991       21506 :                 continue;
    4992     4116150 :             return false;
    4993             :         }
    4994             : 
    4995   100800000 :         if (_padfY[_iDstX] < _poWK->nSrcYOff)
    4996             :         {
    4997             :             // If the source coordinate is slightly outside of the source raster
    4998             :             // retry to transform it alone, so that the exact coordinate
    4999             :             // transformer is used.
    5000     4892120 :             if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
    5001       41563 :                 continue;
    5002     4850560 :             return false;
    5003             :         }
    5004             : 
    5005             :         // Check for potential overflow when casting from float to int, (if
    5006             :         // operating outside natural projection area, padfX/Y can be a very huge
    5007             :         // positive number before doing the actual conversion), as such cast is
    5008             :         // undefined behavior that can trigger exception with some compilers
    5009             :         // (see #6753)
    5010    95907700 :         if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
    5011             :         {
    5012             :             // If the source coordinate is slightly outside of the source raster
    5013             :             // retry to transform it alone, so that the exact coordinate
    5014             :             // transformer is used.
    5015     3503620 :             if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
    5016       33316 :                 continue;
    5017     3470300 :             return false;
    5018             :         }
    5019    92404100 :         if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
    5020             :         {
    5021             :             // If the source coordinate is slightly outside of the source raster
    5022             :             // retry to transform it alone, so that the exact coordinate
    5023             :             // transformer is used.
    5024     3827570 :             if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
    5025       35559 :                 continue;
    5026     3792010 :             return false;
    5027             :         }
    5028             : 
    5029    88576500 :         break;
    5030             :     }
    5031             : 
    5032    88664400 :     int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
    5033    88664400 :     int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
    5034    88664400 :     if (iSrcX == _nSrcXSize)
    5035           0 :         iSrcX--;
    5036    88664400 :     if (iSrcY == _nSrcYSize)
    5037           0 :         iSrcY--;
    5038             : 
    5039             :     // Those checks should normally be OK given the previous ones.
    5040    88664400 :     CPLAssert(iSrcX >= 0);
    5041    88664400 :     CPLAssert(iSrcY >= 0);
    5042    88664400 :     CPLAssert(iSrcX < _nSrcXSize);
    5043    88664400 :     CPLAssert(iSrcY < _nSrcYSize);
    5044             : 
    5045    88664400 :     iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
    5046             : 
    5047    88664400 :     return true;
    5048             : }
    5049             : 
    5050             : /************************************************************************/
    5051             : /*                   GWKOneSourceCornerFailsToReproject()               */
    5052             : /************************************************************************/
    5053             : 
    5054         778 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
    5055             : {
    5056         778 :     GDALWarpKernel *poWK = psJob->poWK;
    5057        2324 :     for (int iY = 0; iY <= 1; ++iY)
    5058             :     {
    5059        4644 :         for (int iX = 0; iX <= 1; ++iX)
    5060             :         {
    5061        3098 :             double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
    5062        3098 :             double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
    5063        3098 :             double dfZTmp = 0;
    5064        3098 :             int nSuccess = FALSE;
    5065        3098 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
    5066             :                                  &dfYTmp, &dfZTmp, &nSuccess);
    5067        3098 :             if (!nSuccess)
    5068           6 :                 return true;
    5069             :         }
    5070             :     }
    5071         772 :     return false;
    5072             : }
    5073             : 
    5074             : /************************************************************************/
    5075             : /*                       GWKAdjustSrcOffsetOnEdge()                     */
    5076             : /************************************************************************/
    5077             : 
    5078        9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
    5079             :                                      GPtrDiff_t &iSrcOffset)
    5080             : {
    5081        9714 :     GDALWarpKernel *poWK = psJob->poWK;
    5082        9714 :     const int nSrcXSize = poWK->nSrcXSize;
    5083        9714 :     const int nSrcYSize = poWK->nSrcYSize;
    5084             : 
    5085             :     // Check if the computed source position slightly altered
    5086             :     // fails to reproject. If so, then we are at the edge of
    5087             :     // the validity area, and it is worth checking neighbour
    5088             :     // source pixels for validity.
    5089        9714 :     int nSuccess = FALSE;
    5090             :     {
    5091        9714 :         double dfXTmp =
    5092        9714 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5093        9714 :         double dfYTmp =
    5094        9714 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5095        9714 :         double dfZTmp = 0;
    5096        9714 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5097             :                              &dfZTmp, &nSuccess);
    5098             :     }
    5099        9714 :     if (nSuccess)
    5100             :     {
    5101        6996 :         double dfXTmp =
    5102        6996 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5103        6996 :         double dfYTmp =
    5104        6996 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5105        6996 :         double dfZTmp = 0;
    5106        6996 :         nSuccess = FALSE;
    5107        6996 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5108             :                              &dfZTmp, &nSuccess);
    5109             :     }
    5110        9714 :     if (nSuccess)
    5111             :     {
    5112        5624 :         double dfXTmp =
    5113        5624 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5114        5624 :         double dfYTmp =
    5115        5624 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5116        5624 :         double dfZTmp = 0;
    5117        5624 :         nSuccess = FALSE;
    5118        5624 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5119             :                              &dfZTmp, &nSuccess);
    5120             :     }
    5121             : 
    5122       14166 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5123        4452 :         CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
    5124             :     {
    5125        1860 :         iSrcOffset++;
    5126        1860 :         return true;
    5127             :     }
    5128       10290 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5129        2436 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
    5130             :     {
    5131        1334 :         iSrcOffset += nSrcXSize;
    5132        1334 :         return true;
    5133             :     }
    5134        7838 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5135        1318 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
    5136             :     {
    5137         956 :         iSrcOffset--;
    5138         956 :         return true;
    5139             :     }
    5140        5924 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5141         360 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
    5142             :     {
    5143         340 :         iSrcOffset -= nSrcXSize;
    5144         340 :         return true;
    5145             :     }
    5146             : 
    5147        5224 :     return false;
    5148             : }
    5149             : 
    5150             : /************************************************************************/
    5151             : /*                 GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity()          */
    5152             : /************************************************************************/
    5153             : 
    5154           0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
    5155             :                                                       GPtrDiff_t &iSrcOffset)
    5156             : {
    5157           0 :     GDALWarpKernel *poWK = psJob->poWK;
    5158           0 :     const int nSrcXSize = poWK->nSrcXSize;
    5159           0 :     const int nSrcYSize = poWK->nSrcYSize;
    5160             : 
    5161             :     // Check if the computed source position slightly altered
    5162             :     // fails to reproject. If so, then we are at the edge of
    5163             :     // the validity area, and it is worth checking neighbour
    5164             :     // source pixels for validity.
    5165           0 :     int nSuccess = FALSE;
    5166             :     {
    5167           0 :         double dfXTmp =
    5168           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5169           0 :         double dfYTmp =
    5170           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5171           0 :         double dfZTmp = 0;
    5172           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5173             :                              &dfZTmp, &nSuccess);
    5174             :     }
    5175           0 :     if (nSuccess)
    5176             :     {
    5177           0 :         double dfXTmp =
    5178           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5179           0 :         double dfYTmp =
    5180           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5181           0 :         double dfZTmp = 0;
    5182           0 :         nSuccess = FALSE;
    5183           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5184             :                              &dfZTmp, &nSuccess);
    5185             :     }
    5186           0 :     if (nSuccess)
    5187             :     {
    5188           0 :         double dfXTmp =
    5189           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5190           0 :         double dfYTmp =
    5191           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5192           0 :         double dfZTmp = 0;
    5193           0 :         nSuccess = FALSE;
    5194           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5195             :                              &dfZTmp, &nSuccess);
    5196             :     }
    5197             : 
    5198           0 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5199           0 :         poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >= SRC_DENSITY_THRESHOLD)
    5200             :     {
    5201           0 :         iSrcOffset++;
    5202           0 :         return true;
    5203             :     }
    5204           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5205           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
    5206             :                  SRC_DENSITY_THRESHOLD)
    5207             :     {
    5208           0 :         iSrcOffset += nSrcXSize;
    5209           0 :         return true;
    5210             :     }
    5211           0 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5212           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
    5213             :                  SRC_DENSITY_THRESHOLD)
    5214             :     {
    5215           0 :         iSrcOffset--;
    5216           0 :         return true;
    5217             :     }
    5218           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5219           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
    5220             :                  SRC_DENSITY_THRESHOLD)
    5221             :     {
    5222           0 :         iSrcOffset -= nSrcXSize;
    5223           0 :         return true;
    5224             :     }
    5225             : 
    5226           0 :     return false;
    5227             : }
    5228             : 
    5229             : /************************************************************************/
    5230             : /*                           GWKGeneralCase()                           */
    5231             : /*                                                                      */
    5232             : /*      This is the most general case.  It attempts to handle all       */
    5233             : /*      possible features with relatively little concern for            */
    5234             : /*      efficiency.                                                     */
    5235             : /************************************************************************/
    5236             : 
    5237         239 : static void GWKGeneralCaseThread(void *pData)
    5238             : {
    5239         239 :     GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
    5240         239 :     GDALWarpKernel *poWK = psJob->poWK;
    5241         239 :     const int iYMin = psJob->iYMin;
    5242         239 :     const int iYMax = psJob->iYMax;
    5243             :     const double dfMultFactorVerticalShiftPipeline =
    5244         239 :         poWK->bApplyVerticalShift
    5245         239 :             ? CPLAtof(CSLFetchNameValueDef(
    5246           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5247             :                   "1.0"))
    5248         239 :             : 0.0;
    5249             : 
    5250         239 :     int nDstXSize = poWK->nDstXSize;
    5251         239 :     int nSrcXSize = poWK->nSrcXSize;
    5252         239 :     int nSrcYSize = poWK->nSrcYSize;
    5253             : 
    5254             :     /* -------------------------------------------------------------------- */
    5255             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5256             :     /*      scanlines worth of positions.                                   */
    5257             :     /* -------------------------------------------------------------------- */
    5258             :     // For x, 2 *, because we cache the precomputed values at the end.
    5259             :     double *padfX =
    5260         239 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5261             :     double *padfY =
    5262         239 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5263             :     double *padfZ =
    5264         239 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5265         239 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5266             : 
    5267         239 :     const bool bUse4SamplesFormula =
    5268         239 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    5269             : 
    5270         239 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5271         239 :     if (poWK->eResample != GRA_NearestNeighbour)
    5272             :     {
    5273         220 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5274             :     }
    5275         239 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5276         239 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5277         239 :     const double dfErrorThreshold = CPLAtof(
    5278         239 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5279             : 
    5280             :     const bool bOneSourceCornerFailsToReproject =
    5281         239 :         GWKOneSourceCornerFailsToReproject(psJob);
    5282             : 
    5283             :     // Precompute values.
    5284        6469 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5285        6230 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5286             : 
    5287             :     /* ==================================================================== */
    5288             :     /*      Loop over output lines.                                         */
    5289             :     /* ==================================================================== */
    5290        6469 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5291             :     {
    5292             :         /* --------------------------------------------------------------------
    5293             :          */
    5294             :         /*      Setup points to transform to source image space. */
    5295             :         /* --------------------------------------------------------------------
    5296             :          */
    5297        6230 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5298        6230 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5299      242390 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5300      236160 :             padfY[iDstX] = dfY;
    5301        6230 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5302             : 
    5303             :         /* --------------------------------------------------------------------
    5304             :          */
    5305             :         /*      Transform the points from destination pixel/line coordinates */
    5306             :         /*      to source pixel/line coordinates. */
    5307             :         /* --------------------------------------------------------------------
    5308             :          */
    5309        6230 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5310             :                              padfY, padfZ, pabSuccess);
    5311        6230 :         if (dfSrcCoordPrecision > 0.0)
    5312             :         {
    5313           0 :             GWKRoundSourceCoordinates(
    5314             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5315             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5316           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5317             :         }
    5318             : 
    5319             :         /* ====================================================================
    5320             :          */
    5321             :         /*      Loop over pixels in output scanline. */
    5322             :         /* ====================================================================
    5323             :          */
    5324      242390 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5325             :         {
    5326      236160 :             GPtrDiff_t iSrcOffset = 0;
    5327      236160 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5328             :                                               padfX, padfY, nSrcXSize,
    5329             :                                               nSrcYSize, iSrcOffset))
    5330           0 :                 continue;
    5331             : 
    5332             :             /* --------------------------------------------------------------------
    5333             :              */
    5334             :             /*      Do not try to apply transparent/invalid source pixels to the
    5335             :              */
    5336             :             /*      destination.  This currently ignores the multi-pixel input
    5337             :              */
    5338             :             /*      of bilinear and cubic resamples. */
    5339             :             /* --------------------------------------------------------------------
    5340             :              */
    5341      236160 :             double dfDensity = 1.0;
    5342             : 
    5343      236160 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5344             :             {
    5345        1200 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5346        1200 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    5347             :                 {
    5348           0 :                     if (!bOneSourceCornerFailsToReproject)
    5349             :                     {
    5350           0 :                         continue;
    5351             :                     }
    5352           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5353             :                                  psJob, iSrcOffset))
    5354             :                     {
    5355           0 :                         dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5356             :                     }
    5357             :                     else
    5358             :                     {
    5359           0 :                         continue;
    5360             :                     }
    5361             :                 }
    5362             :             }
    5363             : 
    5364      236160 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5365           0 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5366             :             {
    5367           0 :                 if (!bOneSourceCornerFailsToReproject)
    5368             :                 {
    5369           0 :                     continue;
    5370             :                 }
    5371           0 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5372             :                 {
    5373           0 :                     continue;
    5374             :                 }
    5375             :             }
    5376             : 
    5377             :             /* ====================================================================
    5378             :              */
    5379             :             /*      Loop processing each band. */
    5380             :             /* ====================================================================
    5381             :              */
    5382      236160 :             bool bHasFoundDensity = false;
    5383             : 
    5384      236160 :             const GPtrDiff_t iDstOffset =
    5385      236160 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5386      472320 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5387             :             {
    5388      236160 :                 double dfBandDensity = 0.0;
    5389      236160 :                 double dfValueReal = 0.0;
    5390      236160 :                 double dfValueImag = 0.0;
    5391             : 
    5392             :                 /* --------------------------------------------------------------------
    5393             :                  */
    5394             :                 /*      Collect the source value. */
    5395             :                 /* --------------------------------------------------------------------
    5396             :                  */
    5397      236160 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5398             :                     nSrcYSize == 1)
    5399             :                 {
    5400             :                     // FALSE is returned if dfBandDensity == 0, which is
    5401             :                     // checked below.
    5402         568 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValue(
    5403             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
    5404             :                         &dfValueImag));
    5405             :                 }
    5406      235592 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5407             :                 {
    5408         248 :                     GWKBilinearResample4Sample(
    5409         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5410         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5411             :                         &dfValueReal, &dfValueImag);
    5412             :                 }
    5413      235344 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5414             :                 {
    5415         248 :                     GWKCubicResample4Sample(
    5416         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5417         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5418             :                         &dfValueReal, &dfValueImag);
    5419             :                 }
    5420             :                 else
    5421             : #ifdef DEBUG
    5422             :                     // Only useful for clang static analyzer.
    5423      235096 :                     if (psWrkStruct != nullptr)
    5424             : #endif
    5425             :                     {
    5426      235096 :                         psWrkStruct->pfnGWKResample(
    5427      235096 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5428      235096 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5429             :                             &dfValueReal, &dfValueImag, psWrkStruct);
    5430             :                     }
    5431             : 
    5432             :                 // If we didn't find any valid inputs skip to next band.
    5433      236160 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5434           0 :                     continue;
    5435             : 
    5436      236160 :                 if (poWK->bApplyVerticalShift)
    5437             :                 {
    5438           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5439           0 :                         continue;
    5440             :                     // Subtract padfZ[] since the coordinate transformation is
    5441             :                     // from target to source
    5442           0 :                     dfValueReal =
    5443           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5444           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5445             :                 }
    5446             : 
    5447      236160 :                 bHasFoundDensity = true;
    5448             : 
    5449             :                 /* --------------------------------------------------------------------
    5450             :                  */
    5451             :                 /*      We have a computed value from the source.  Now apply it
    5452             :                  * to      */
    5453             :                 /*      the destination pixel. */
    5454             :                 /* --------------------------------------------------------------------
    5455             :                  */
    5456      236160 :                 GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    5457             :                                  dfValueReal, dfValueImag);
    5458             :             }
    5459             : 
    5460      236160 :             if (!bHasFoundDensity)
    5461           0 :                 continue;
    5462             : 
    5463             :             /* --------------------------------------------------------------------
    5464             :              */
    5465             :             /*      Update destination density/validity masks. */
    5466             :             /* --------------------------------------------------------------------
    5467             :              */
    5468      236160 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5469             : 
    5470      236160 :             if (poWK->panDstValid != nullptr)
    5471             :             {
    5472           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5473             :             }
    5474             :         } /* Next iDstX */
    5475             : 
    5476             :         /* --------------------------------------------------------------------
    5477             :          */
    5478             :         /*      Report progress to the user, and optionally cancel out. */
    5479             :         /* --------------------------------------------------------------------
    5480             :          */
    5481        6230 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5482           0 :             break;
    5483             :     }
    5484             : 
    5485             :     /* -------------------------------------------------------------------- */
    5486             :     /*      Cleanup and return.                                             */
    5487             :     /* -------------------------------------------------------------------- */
    5488         239 :     CPLFree(padfX);
    5489         239 :     CPLFree(padfY);
    5490         239 :     CPLFree(padfZ);
    5491         239 :     CPLFree(pabSuccess);
    5492         239 :     if (psWrkStruct)
    5493         220 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5494         239 : }
    5495             : 
    5496         239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
    5497             : {
    5498         239 :     return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
    5499             : }
    5500             : 
    5501             : /************************************************************************/
    5502             : /*                            GWKRealCase()                             */
    5503             : /*                                                                      */
    5504             : /*      General case for non-complex data types.                        */
    5505             : /************************************************************************/
    5506             : 
    5507         151 : static void GWKRealCaseThread(void *pData)
    5508             : 
    5509             : {
    5510         151 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    5511         151 :     GDALWarpKernel *poWK = psJob->poWK;
    5512         151 :     const int iYMin = psJob->iYMin;
    5513         151 :     const int iYMax = psJob->iYMax;
    5514             : 
    5515         151 :     const int nDstXSize = poWK->nDstXSize;
    5516         151 :     const int nSrcXSize = poWK->nSrcXSize;
    5517         151 :     const int nSrcYSize = poWK->nSrcYSize;
    5518             :     const double dfMultFactorVerticalShiftPipeline =
    5519         151 :         poWK->bApplyVerticalShift
    5520         151 :             ? CPLAtof(CSLFetchNameValueDef(
    5521           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5522             :                   "1.0"))
    5523         151 :             : 0.0;
    5524             : 
    5525             :     /* -------------------------------------------------------------------- */
    5526             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5527             :     /*      scanlines worth of positions.                                   */
    5528             :     /* -------------------------------------------------------------------- */
    5529             : 
    5530             :     // For x, 2 *, because we cache the precomputed values at the end.
    5531             :     double *padfX =
    5532         151 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5533             :     double *padfY =
    5534         151 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5535             :     double *padfZ =
    5536         151 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5537         151 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5538             : 
    5539         151 :     const bool bUse4SamplesFormula =
    5540         151 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    5541             : 
    5542         151 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5543         151 :     if (poWK->eResample != GRA_NearestNeighbour)
    5544             :     {
    5545         124 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5546             :     }
    5547         151 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5548         151 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5549         151 :     const double dfErrorThreshold = CPLAtof(
    5550         151 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5551             : 
    5552         438 :     const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
    5553         287 :                                    poWK->papanBandSrcValid == nullptr &&
    5554         136 :                                    poWK->pafUnifiedSrcDensity != nullptr;
    5555             : 
    5556             :     const bool bOneSourceCornerFailsToReproject =
    5557         151 :         GWKOneSourceCornerFailsToReproject(psJob);
    5558             : 
    5559             :     // Precompute values.
    5560       19528 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5561       19377 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5562             : 
    5563             :     /* ==================================================================== */
    5564             :     /*      Loop over output lines.                                         */
    5565             :     /* ==================================================================== */
    5566       22231 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5567             :     {
    5568             :         /* --------------------------------------------------------------------
    5569             :          */
    5570             :         /*      Setup points to transform to source image space. */
    5571             :         /* --------------------------------------------------------------------
    5572             :          */
    5573       22080 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5574       22080 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5575    43558400 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5576    43536300 :             padfY[iDstX] = dfY;
    5577       22080 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5578             : 
    5579             :         /* --------------------------------------------------------------------
    5580             :          */
    5581             :         /*      Transform the points from destination pixel/line coordinates */
    5582             :         /*      to source pixel/line coordinates. */
    5583             :         /* --------------------------------------------------------------------
    5584             :          */
    5585       22080 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5586             :                              padfY, padfZ, pabSuccess);
    5587       22080 :         if (dfSrcCoordPrecision > 0.0)
    5588             :         {
    5589           0 :             GWKRoundSourceCoordinates(
    5590             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5591             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5592           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5593             :         }
    5594             : 
    5595             :         /* ====================================================================
    5596             :          */
    5597             :         /*      Loop over pixels in output scanline. */
    5598             :         /* ====================================================================
    5599             :          */
    5600    43558400 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5601             :         {
    5602    43536300 :             GPtrDiff_t iSrcOffset = 0;
    5603    43536300 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5604             :                                               padfX, padfY, nSrcXSize,
    5605             :                                               nSrcYSize, iSrcOffset))
    5606    42894000 :                 continue;
    5607             : 
    5608             :             /* --------------------------------------------------------------------
    5609             :              */
    5610             :             /*      Do not try to apply transparent/invalid source pixels to the
    5611             :              */
    5612             :             /*      destination.  This currently ignores the multi-pixel input
    5613             :              */
    5614             :             /*      of bilinear and cubic resamples. */
    5615             :             /* --------------------------------------------------------------------
    5616             :              */
    5617    31479800 :             double dfDensity = 1.0;
    5618             : 
    5619    31479800 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5620             :             {
    5621     1360180 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5622     1360180 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    5623             :                 {
    5624     1308710 :                     if (!bOneSourceCornerFailsToReproject)
    5625             :                     {
    5626     1308710 :                         continue;
    5627             :                     }
    5628           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5629             :                                  psJob, iSrcOffset))
    5630             :                     {
    5631           0 :                         dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5632             :                     }
    5633             :                     else
    5634             :                     {
    5635           0 :                         continue;
    5636             :                     }
    5637             :                 }
    5638             :             }
    5639             : 
    5640    59799700 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5641    29628600 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5642             :             {
    5643    29531000 :                 if (!bOneSourceCornerFailsToReproject)
    5644             :                 {
    5645    29528700 :                     continue;
    5646             :                 }
    5647        2229 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5648             :                 {
    5649           0 :                     continue;
    5650             :                 }
    5651             :             }
    5652             : 
    5653             :             /* ====================================================================
    5654             :              */
    5655             :             /*      Loop processing each band. */
    5656             :             /* ====================================================================
    5657             :              */
    5658      642336 :             bool bHasFoundDensity = false;
    5659             : 
    5660      642336 :             const GPtrDiff_t iDstOffset =
    5661      642336 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5662     1716290 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5663             :             {
    5664     1073960 :                 double dfBandDensity = 0.0;
    5665     1073960 :                 double dfValueReal = 0.0;
    5666             : 
    5667             :                 /* --------------------------------------------------------------------
    5668             :                  */
    5669             :                 /*      Collect the source value. */
    5670             :                 /* --------------------------------------------------------------------
    5671             :                  */
    5672     1073960 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5673             :                     nSrcYSize == 1)
    5674             :                 {
    5675             :                     // FALSE is returned if dfBandDensity == 0, which is
    5676             :                     // checked below.
    5677      151448 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
    5678             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
    5679             :                 }
    5680      922509 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5681             :                 {
    5682        1086 :                     double dfValueImagIgnored = 0.0;
    5683        1086 :                     GWKBilinearResample4Sample(
    5684        1086 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5685        1086 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5686        1086 :                         &dfValueReal, &dfValueImagIgnored);
    5687             :                 }
    5688      921423 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5689             :                 {
    5690      299992 :                     if (bSrcMaskIsDensity)
    5691             :                     {
    5692         361 :                         if (poWK->eWorkingDataType == GDT_Byte)
    5693             :                         {
    5694         361 :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
    5695         361 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5696         361 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5697             :                                 &dfValueReal);
    5698             :                         }
    5699           0 :                         else if (poWK->eWorkingDataType == GDT_UInt16)
    5700             :                         {
    5701             :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<
    5702           0 :                                 GUInt16>(poWK, iBand,
    5703           0 :                                          padfX[iDstX] - poWK->nSrcXOff,
    5704           0 :                                          padfY[iDstX] - poWK->nSrcYOff,
    5705             :                                          &dfBandDensity, &dfValueReal);
    5706             :                         }
    5707             :                         else
    5708             :                         {
    5709           0 :                             GWKCubicResampleSrcMaskIsDensity4SampleReal(
    5710           0 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5711           0 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5712             :                                 &dfValueReal);
    5713             :                         }
    5714             :                     }
    5715             :                     else
    5716             :                     {
    5717      299631 :                         double dfValueImagIgnored = 0.0;
    5718      299631 :                         GWKCubicResample4Sample(
    5719      299631 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5720      299631 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5721             :                             &dfValueReal, &dfValueImagIgnored);
    5722      299992 :                     }
    5723             :                 }
    5724             :                 else
    5725             : #ifdef DEBUG
    5726             :                     // Only useful for clang static analyzer.
    5727      621431 :                     if (psWrkStruct != nullptr)
    5728             : #endif
    5729             :                     {
    5730      621431 :                         double dfValueImagIgnored = 0.0;
    5731      621431 :                         psWrkStruct->pfnGWKResample(
    5732      621431 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5733      621431 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5734             :                             &dfValueReal, &dfValueImagIgnored, psWrkStruct);
    5735             :                     }
    5736             : 
    5737             :                 // If we didn't find any valid inputs skip to next band.
    5738     1073960 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5739           0 :                     continue;
    5740             : 
    5741     1073960 :                 if (poWK->bApplyVerticalShift)
    5742             :                 {
    5743           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5744           0 :                         continue;
    5745             :                     // Subtract padfZ[] since the coordinate transformation is
    5746             :                     // from target to source
    5747           0 :                     dfValueReal =
    5748           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5749           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5750             :                 }
    5751             : 
    5752     1073960 :                 bHasFoundDensity = true;
    5753             : 
    5754             :                 /* --------------------------------------------------------------------
    5755             :                  */
    5756             :                 /*      We have a computed value from the source.  Now apply it
    5757             :                  * to      */
    5758             :                 /*      the destination pixel. */
    5759             :                 /* --------------------------------------------------------------------
    5760             :                  */
    5761     1073960 :                 GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
    5762             :                                      dfValueReal);
    5763             :             }
    5764             : 
    5765      642336 :             if (!bHasFoundDensity)
    5766           0 :                 continue;
    5767             : 
    5768             :             /* --------------------------------------------------------------------
    5769             :              */
    5770             :             /*      Update destination density/validity masks. */
    5771             :             /* --------------------------------------------------------------------
    5772             :              */
    5773      642336 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5774             : 
    5775      642336 :             if (poWK->panDstValid != nullptr)
    5776             :             {
    5777      101716 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5778             :             }
    5779             :         }  // Next iDstX.
    5780             : 
    5781             :         /* --------------------------------------------------------------------
    5782             :          */
    5783             :         /*      Report progress to the user, and optionally cancel out. */
    5784             :         /* --------------------------------------------------------------------
    5785             :          */
    5786       22080 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5787           0 :             break;
    5788             :     }
    5789             : 
    5790             :     /* -------------------------------------------------------------------- */
    5791             :     /*      Cleanup and return.                                             */
    5792             :     /* -------------------------------------------------------------------- */
    5793         151 :     CPLFree(padfX);
    5794         151 :     CPLFree(padfY);
    5795         151 :     CPLFree(padfZ);
    5796         151 :     CPLFree(pabSuccess);
    5797         151 :     if (psWrkStruct)
    5798         124 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5799         151 : }
    5800             : 
    5801         151 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
    5802             : {
    5803         151 :     return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
    5804             : }
    5805             : 
    5806             : /************************************************************************/
    5807             : /*                 GWKCubicResampleNoMasks4MultiBandT()                 */
    5808             : /************************************************************************/
    5809             : 
    5810             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    5811             : /* and enough SSE registries */
    5812             : #if defined(USE_SSE2)
    5813             : 
    5814      238596 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
    5815             :                                  const __m128 row2, const __m128 row3,
    5816             :                                  const __m128 weightsXY0,
    5817             :                                  const __m128 weightsXY1,
    5818             :                                  const __m128 weightsXY2,
    5819             :                                  const __m128 weightsXY3)
    5820             : {
    5821     1670170 :     return XMMHorizontalAdd(_mm_add_ps(
    5822             :         _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
    5823             :         _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
    5824      238596 :                    _mm_mul_ps(row3, weightsXY3))));
    5825             : }
    5826             : 
    5827             : template <class T>
    5828       81323 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
    5829             :                                                double dfSrcX, double dfSrcY,
    5830             :                                                const GPtrDiff_t iDstOffset)
    5831             : {
    5832       81323 :     const double dfSrcXShifted = dfSrcX - 0.5;
    5833       81323 :     const int iSrcX = static_cast<int>(dfSrcXShifted);
    5834       81323 :     const double dfSrcYShifted = dfSrcY - 0.5;
    5835       81323 :     const int iSrcY = static_cast<int>(dfSrcYShifted);
    5836       81323 :     const GPtrDiff_t iSrcOffset =
    5837       81323 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    5838             : 
    5839             :     // Get the bilinear interpolation at the image borders.
    5840       81323 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    5841       80326 :         iSrcY + 2 >= poWK->nSrcYSize)
    5842             :     {
    5843        7164 :         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5844             :         {
    5845             :             T value;
    5846        5373 :             GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    5847             :                                                &value);
    5848        5373 :             reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    5849             :                 value;
    5850        1791 :         }
    5851             :     }
    5852             :     else
    5853             :     {
    5854       79532 :         const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
    5855       79532 :         const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
    5856             : 
    5857             :         float afCoeffsX[4];
    5858             :         float afCoeffsY[4];
    5859       79532 :         GWKCubicComputeWeights(fDeltaX, afCoeffsX);
    5860       79532 :         GWKCubicComputeWeights(fDeltaY, afCoeffsY);
    5861       79532 :         const auto weightsX = _mm_loadu_ps(afCoeffsX);
    5862             :         const auto weightsXY0 =
    5863      159064 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
    5864             :         const auto weightsXY1 =
    5865      159064 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
    5866             :         const auto weightsXY2 =
    5867      159064 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
    5868             :         const auto weightsXY3 =
    5869       79532 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
    5870             : 
    5871       79532 :         const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
    5872             : 
    5873       79532 :         int iBand = 0;
    5874             :         // Process 2 bands at a time
    5875      159064 :         for (; iBand + 1 < poWK->nBands; iBand += 2)
    5876             :         {
    5877       79532 :             const T *CPL_RESTRICT pBand0 =
    5878       79532 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    5879       79532 :             const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
    5880             :             const auto row1_0 =
    5881       79532 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    5882             :             const auto row2_0 =
    5883       79532 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    5884             :             const auto row3_0 =
    5885       79532 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    5886             : 
    5887       79532 :             const T *CPL_RESTRICT pBand1 =
    5888       79532 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
    5889       79532 :             const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
    5890             :             const auto row1_1 =
    5891       79532 :                 XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
    5892             :             const auto row2_1 =
    5893       79532 :                 XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
    5894             :             const auto row3_1 =
    5895       79532 :                 XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
    5896             : 
    5897             :             const float fValue_0 =
    5898       79532 :                 Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
    5899             :                              weightsXY1, weightsXY2, weightsXY3);
    5900             : 
    5901             :             const float fValue_1 =
    5902       79532 :                 Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
    5903             :                              weightsXY1, weightsXY2, weightsXY3);
    5904             : 
    5905       79532 :             T *CPL_RESTRICT pDstBand0 =
    5906       79532 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    5907       79532 :             pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
    5908             : 
    5909       79532 :             T *CPL_RESTRICT pDstBand1 =
    5910       79532 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
    5911       79532 :             pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
    5912             :         }
    5913       79532 :         if (iBand < poWK->nBands)
    5914             :         {
    5915       79532 :             const T *CPL_RESTRICT pBand0 =
    5916       79532 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    5917       79532 :             const auto row0 = XMMLoad4Values(pBand0 + iOffset);
    5918             :             const auto row1 =
    5919       79532 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    5920             :             const auto row2 =
    5921       79532 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    5922             :             const auto row3 =
    5923       79532 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    5924             : 
    5925             :             const float fValue =
    5926       79532 :                 Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
    5927             :                              weightsXY2, weightsXY3);
    5928             : 
    5929       79532 :             T *CPL_RESTRICT pDstBand =
    5930       79532 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    5931       79532 :             pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
    5932             :         }
    5933             :     }
    5934             : 
    5935       81323 :     if (poWK->pafDstDensity)
    5936         441 :         poWK->pafDstDensity[iDstOffset] = 1.0f;
    5937       81323 : }
    5938             : 
    5939             : #endif  // defined(USE_SSE2)
    5940             : 
    5941             : /************************************************************************/
    5942             : /*                GWKResampleNoMasksOrDstDensityOnlyThreadInternal()    */
    5943             : /************************************************************************/
    5944             : 
    5945             : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
    5946        1189 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
    5947             : 
    5948             : {
    5949        1189 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    5950        1189 :     GDALWarpKernel *poWK = psJob->poWK;
    5951        1189 :     const int iYMin = psJob->iYMin;
    5952        1189 :     const int iYMax = psJob->iYMax;
    5953        1171 :     const double dfMultFactorVerticalShiftPipeline =
    5954        1189 :         poWK->bApplyVerticalShift
    5955          18 :             ? CPLAtof(CSLFetchNameValueDef(
    5956          18 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5957             :                   "1.0"))
    5958             :             : 0.0;
    5959             : 
    5960        1189 :     const int nDstXSize = poWK->nDstXSize;
    5961        1189 :     const int nSrcXSize = poWK->nSrcXSize;
    5962        1189 :     const int nSrcYSize = poWK->nSrcYSize;
    5963             : 
    5964             :     /* -------------------------------------------------------------------- */
    5965             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5966             :     /*      scanlines worth of positions.                                   */
    5967             :     /* -------------------------------------------------------------------- */
    5968             : 
    5969             :     // For x, 2 *, because we cache the precomputed values at the end.
    5970             :     double *padfX =
    5971        1189 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5972             :     double *padfY =
    5973        1189 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5974             :     double *padfZ =
    5975        1189 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5976        1189 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5977             : 
    5978        1189 :     const int nXRadius = poWK->nXRadius;
    5979             :     double *padfWeightsX =
    5980        1189 :         static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
    5981             :     double *padfWeightsY = static_cast<double *>(
    5982        1189 :         CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
    5983        1189 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5984        1189 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5985        1189 :     const double dfErrorThreshold = CPLAtof(
    5986        1189 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5987             : 
    5988             :     // Precompute values.
    5989      256069 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5990      254880 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5991             : 
    5992             :     /* ==================================================================== */
    5993             :     /*      Loop over output lines.                                         */
    5994             :     /* ==================================================================== */
    5995      130279 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5996             :     {
    5997             :         /* --------------------------------------------------------------------
    5998             :          */
    5999             :         /*      Setup points to transform to source image space. */
    6000             :         /* --------------------------------------------------------------------
    6001             :          */
    6002      129092 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6003      129092 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6004    56026240 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6005    55897120 :             padfY[iDstX] = dfY;
    6006      129092 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6007             : 
    6008             :         /* --------------------------------------------------------------------
    6009             :          */
    6010             :         /*      Transform the points from destination pixel/line coordinates */
    6011             :         /*      to source pixel/line coordinates. */
    6012             :         /* --------------------------------------------------------------------
    6013             :          */
    6014      129092 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6015             :                              padfY, padfZ, pabSuccess);
    6016      129092 :         if (dfSrcCoordPrecision > 0.0)
    6017             :         {
    6018        1000 :             GWKRoundSourceCoordinates(
    6019             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6020             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6021        1000 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6022             :         }
    6023             : 
    6024             :         /* ====================================================================
    6025             :          */
    6026             :         /*      Loop over pixels in output scanline. */
    6027             :         /* ====================================================================
    6028             :          */
    6029    56028450 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6030             :         {
    6031    55899330 :             GPtrDiff_t iSrcOffset = 0;
    6032    55899330 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6033             :                                               padfX, padfY, nSrcXSize,
    6034             :                                               nSrcYSize, iSrcOffset))
    6035     6608460 :                 continue;
    6036             : 
    6037             :             /* ====================================================================
    6038             :              */
    6039             :             /*      Loop processing each band. */
    6040             :             /* ====================================================================
    6041             :              */
    6042    49339138 :             const GPtrDiff_t iDstOffset =
    6043    49339138 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6044             : 
    6045             : #if defined(USE_SSE2)
    6046             :             if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
    6047             :                           (std::is_same<T, GByte>::value ||
    6048             :                            std::is_same<T, GUInt16>::value))
    6049             :             {
    6050      752574 :                 if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
    6051             :                 {
    6052       81323 :                     GWKCubicResampleNoMasks4MultiBandT<T>(
    6053       81323 :                         poWK, padfX[iDstX] - poWK->nSrcXOff,
    6054       81323 :                         padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
    6055             : 
    6056       81323 :                     continue;
    6057             :                 }
    6058             :             }
    6059             : #endif  // defined(USE_SSE2)
    6060             : 
    6061    49257815 :             [[maybe_unused]] double dfInvWeights = 0;
    6062   139782922 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6063             :             {
    6064    90492185 :                 T value = 0;
    6065             :                 if constexpr (eResample == GRA_NearestNeighbour)
    6066             :                 {
    6067    76037130 :                     value = reinterpret_cast<T *>(
    6068    76037130 :                         poWK->papabySrcImage[iBand])[iSrcOffset];
    6069             :                 }
    6070             :                 else if constexpr (bUse4SamplesFormula)
    6071             :                 {
    6072             :                     if constexpr (eResample == GRA_Bilinear)
    6073     3363189 :                         GWKBilinearResampleNoMasks4SampleT(
    6074     3363189 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6075     3363189 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6076             :                     else
    6077     1906603 :                         GWKCubicResampleNoMasks4SampleT(
    6078     1906603 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6079     1906603 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6080             :                 }
    6081             :                 else
    6082             :                 {
    6083     9185263 :                     GWKResampleNoMasksT(
    6084     9185263 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6085     9185263 :                         padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
    6086             :                         padfWeightsY, dfInvWeights);
    6087             :                 }
    6088             : 
    6089    90486835 :                 if (poWK->bApplyVerticalShift)
    6090             :                 {
    6091         818 :                     if (!std::isfinite(padfZ[iDstX]))
    6092           0 :                         continue;
    6093             :                     // Subtract padfZ[] since the coordinate transformation is
    6094             :                     // from target to source
    6095       39547 :                     value = GWKClampValueT<T>(
    6096         818 :                         value * poWK->dfMultFactorVerticalShift -
    6097         818 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6098             :                 }
    6099             : 
    6100    90525305 :                 if (poWK->pafDstDensity)
    6101    11712309 :                     poWK->pafDstDensity[iDstOffset] = 1.0f;
    6102             : 
    6103    90525305 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6104             :                     value;
    6105             :             }
    6106             :         }
    6107             : 
    6108             :         /* --------------------------------------------------------------------
    6109             :          */
    6110             :         /*      Report progress to the user, and optionally cancel out. */
    6111             :         /* --------------------------------------------------------------------
    6112             :          */
    6113      129092 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6114           2 :             break;
    6115             :     }
    6116             : 
    6117             :     /* -------------------------------------------------------------------- */
    6118             :     /*      Cleanup and return.                                             */
    6119             :     /* -------------------------------------------------------------------- */
    6120        1189 :     CPLFree(padfX);
    6121        1189 :     CPLFree(padfY);
    6122        1189 :     CPLFree(padfZ);
    6123        1189 :     CPLFree(pabSuccess);
    6124        1189 :     CPLFree(padfWeightsX);
    6125        1189 :     CPLFree(padfWeightsY);
    6126        1189 : }
    6127             : 
    6128             : template <class T, GDALResampleAlg eResample>
    6129         947 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
    6130             : {
    6131         947 :     GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6132             :         pData);
    6133         947 : }
    6134             : 
    6135             : template <class T, GDALResampleAlg eResample>
    6136         242 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
    6137             : 
    6138             : {
    6139         242 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6140         242 :     GDALWarpKernel *poWK = psJob->poWK;
    6141             :     static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
    6142         242 :     const bool bUse4SamplesFormula =
    6143         242 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    6144         242 :     if (bUse4SamplesFormula)
    6145         142 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
    6146             :             pData);
    6147             :     else
    6148         100 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6149             :             pData);
    6150         242 : }
    6151             : 
    6152         896 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6153             : {
    6154         896 :     return GWKRun(
    6155             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
    6156         896 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
    6157             : }
    6158             : 
    6159         126 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6160             : {
    6161         126 :     return GWKRun(
    6162             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
    6163             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
    6164         126 :                                                            GRA_Bilinear>);
    6165             : }
    6166             : 
    6167          72 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6168             : {
    6169          72 :     return GWKRun(
    6170             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
    6171          72 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
    6172             : }
    6173             : 
    6174           9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6175             : {
    6176           9 :     return GWKRun(
    6177             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
    6178           9 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
    6179             : }
    6180             : 
    6181             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6182             : 
    6183             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6184             : {
    6185             :     return GWKRun(
    6186             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
    6187             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
    6188             : }
    6189             : #endif
    6190             : 
    6191          12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6192             : {
    6193          12 :     return GWKRun(
    6194             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
    6195          12 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
    6196             : }
    6197             : 
    6198             : /************************************************************************/
    6199             : /*                          GWKNearestByte()                            */
    6200             : /*                                                                      */
    6201             : /*      Case for 8bit input data with nearest neighbour resampling      */
    6202             : /*      using valid flags. Should be as fast as possible for this       */
    6203             : /*      particular transformation type.                                 */
    6204             : /************************************************************************/
    6205             : 
    6206         388 : template <class T> static void GWKNearestThread(void *pData)
    6207             : 
    6208             : {
    6209         388 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6210         388 :     GDALWarpKernel *poWK = psJob->poWK;
    6211         388 :     const int iYMin = psJob->iYMin;
    6212         388 :     const int iYMax = psJob->iYMax;
    6213         388 :     const double dfMultFactorVerticalShiftPipeline =
    6214         388 :         poWK->bApplyVerticalShift
    6215           0 :             ? CPLAtof(CSLFetchNameValueDef(
    6216           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6217             :                   "1.0"))
    6218             :             : 0.0;
    6219             : 
    6220         388 :     const int nDstXSize = poWK->nDstXSize;
    6221         388 :     const int nSrcXSize = poWK->nSrcXSize;
    6222         388 :     const int nSrcYSize = poWK->nSrcYSize;
    6223             : 
    6224             :     /* -------------------------------------------------------------------- */
    6225             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6226             :     /*      scanlines worth of positions.                                   */
    6227             :     /* -------------------------------------------------------------------- */
    6228             : 
    6229             :     // For x, 2 *, because we cache the precomputed values at the end.
    6230             :     double *padfX =
    6231         388 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6232             :     double *padfY =
    6233         388 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6234             :     double *padfZ =
    6235         388 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6236         388 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6237             : 
    6238         388 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6239         388 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6240         388 :     const double dfErrorThreshold = CPLAtof(
    6241         388 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6242             : 
    6243             :     const bool bOneSourceCornerFailsToReproject =
    6244         388 :         GWKOneSourceCornerFailsToReproject(psJob);
    6245             : 
    6246             :     // Precompute values.
    6247       56961 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6248       56573 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6249             : 
    6250             :     /* ==================================================================== */
    6251             :     /*      Loop over output lines.                                         */
    6252             :     /* ==================================================================== */
    6253       41998 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6254             :     {
    6255             : 
    6256             :         /* --------------------------------------------------------------------
    6257             :          */
    6258             :         /*      Setup points to transform to source image space. */
    6259             :         /* --------------------------------------------------------------------
    6260             :          */
    6261       41610 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6262       41610 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6263     8768225 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6264     8726610 :             padfY[iDstX] = dfY;
    6265       41610 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6266             : 
    6267             :         /* --------------------------------------------------------------------
    6268             :          */
    6269             :         /*      Transform the points from destination pixel/line coordinates */
    6270             :         /*      to source pixel/line coordinates. */
    6271             :         /* --------------------------------------------------------------------
    6272             :          */
    6273       41610 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6274             :                              padfY, padfZ, pabSuccess);
    6275       41610 :         if (dfSrcCoordPrecision > 0.0)
    6276             :         {
    6277           0 :             GWKRoundSourceCoordinates(
    6278             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6279             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6280           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6281             :         }
    6282             :         /* ====================================================================
    6283             :          */
    6284             :         /*      Loop over pixels in output scanline. */
    6285             :         /* ====================================================================
    6286             :          */
    6287     8768225 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6288             :         {
    6289     8726610 :             GPtrDiff_t iSrcOffset = 0;
    6290     8726610 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6291             :                                               padfX, padfY, nSrcXSize,
    6292             :                                               nSrcYSize, iSrcOffset))
    6293     2242803 :                 continue;
    6294             : 
    6295             :             /* --------------------------------------------------------------------
    6296             :              */
    6297             :             /*      Do not try to apply invalid source pixels to the dest. */
    6298             :             /* --------------------------------------------------------------------
    6299             :              */
    6300     8418056 :             if (poWK->panUnifiedSrcValid != nullptr &&
    6301      930304 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6302             :             {
    6303       48956 :                 if (!bOneSourceCornerFailsToReproject)
    6304             :                 {
    6305       41471 :                     continue;
    6306             :                 }
    6307        7485 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    6308             :                 {
    6309        5224 :                     continue;
    6310             :                 }
    6311             :             }
    6312             : 
    6313             :             /* --------------------------------------------------------------------
    6314             :              */
    6315             :             /*      Do not try to apply transparent source pixels to the
    6316             :              * destination.*/
    6317             :             /* --------------------------------------------------------------------
    6318             :              */
    6319     7441059 :             double dfDensity = 1.0;
    6320             : 
    6321     7441059 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    6322             :             {
    6323     1064945 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    6324     1064945 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    6325      957251 :                     continue;
    6326             :             }
    6327             : 
    6328             :             /* ====================================================================
    6329             :              */
    6330             :             /*      Loop processing each band. */
    6331             :             /* ====================================================================
    6332             :              */
    6333             : 
    6334     6483807 :             const GPtrDiff_t iDstOffset =
    6335     6483807 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6336             : 
    6337    14642964 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6338             :             {
    6339     8159117 :                 T value = 0;
    6340     8159117 :                 double dfBandDensity = 0.0;
    6341             : 
    6342             :                 /* --------------------------------------------------------------------
    6343             :                  */
    6344             :                 /*      Collect the source value. */
    6345             :                 /* --------------------------------------------------------------------
    6346             :                  */
    6347     8159117 :                 if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
    6348             :                                  &value))
    6349             :                 {
    6350             : 
    6351     8159107 :                     if (poWK->bApplyVerticalShift)
    6352             :                     {
    6353           0 :                         if (!std::isfinite(padfZ[iDstX]))
    6354           0 :                             continue;
    6355             :                         // Subtract padfZ[] since the coordinate transformation
    6356             :                         // is from target to source
    6357           0 :                         value = GWKClampValueT<T>(
    6358           0 :                             value * poWK->dfMultFactorVerticalShift -
    6359           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6360             :                     }
    6361             : 
    6362     8159107 :                     GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    6363             :                                           dfBandDensity, value);
    6364             :                 }
    6365             :             }
    6366             : 
    6367             :             /* --------------------------------------------------------------------
    6368             :              */
    6369             :             /*      Mark this pixel valid/opaque in the output. */
    6370             :             /* --------------------------------------------------------------------
    6371             :              */
    6372     6483807 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6373             : 
    6374     6483807 :             if (poWK->panDstValid != nullptr)
    6375             :             {
    6376     5854774 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6377             :             }
    6378             :         } /* Next iDstX */
    6379             : 
    6380             :         /* --------------------------------------------------------------------
    6381             :          */
    6382             :         /*      Report progress to the user, and optionally cancel out. */
    6383             :         /* --------------------------------------------------------------------
    6384             :          */
    6385       41610 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6386           0 :             break;
    6387             :     }
    6388             : 
    6389             :     /* -------------------------------------------------------------------- */
    6390             :     /*      Cleanup and return.                                             */
    6391             :     /* -------------------------------------------------------------------- */
    6392         388 :     CPLFree(padfX);
    6393         388 :     CPLFree(padfY);
    6394         388 :     CPLFree(padfZ);
    6395         388 :     CPLFree(pabSuccess);
    6396         388 : }
    6397             : 
    6398         324 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
    6399             : {
    6400         324 :     return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
    6401             : }
    6402             : 
    6403          14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6404             : {
    6405          14 :     return GWKRun(
    6406             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
    6407          14 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
    6408             : }
    6409             : 
    6410           5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6411             : {
    6412           5 :     return GWKRun(
    6413             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
    6414             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
    6415           5 :                                                            GRA_Bilinear>);
    6416             : }
    6417             : 
    6418           6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6419             : {
    6420           6 :     return GWKRun(
    6421             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
    6422             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
    6423           6 :                                                            GRA_Bilinear>);
    6424             : }
    6425             : 
    6426           4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6427             : {
    6428           4 :     return GWKRun(
    6429             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
    6430             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
    6431           4 :                                                            GRA_Bilinear>);
    6432             : }
    6433             : 
    6434             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6435             : 
    6436             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6437             : {
    6438             :     return GWKRun(
    6439             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
    6440             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
    6441             :                                                            GRA_Bilinear>);
    6442             : }
    6443             : #endif
    6444             : 
    6445           5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6446             : {
    6447           5 :     return GWKRun(
    6448             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
    6449           5 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
    6450             : }
    6451             : 
    6452          12 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6453             : {
    6454          12 :     return GWKRun(
    6455             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
    6456          12 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
    6457             : }
    6458             : 
    6459           6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6460             : {
    6461           6 :     return GWKRun(
    6462             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
    6463           6 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
    6464             : }
    6465             : 
    6466           5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6467             : {
    6468           5 :     return GWKRun(
    6469             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
    6470           5 :         GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
    6471             : }
    6472             : 
    6473          23 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
    6474             : {
    6475          23 :     return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
    6476             : }
    6477             : 
    6478           0 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
    6479             : {
    6480           0 :     return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
    6481             : }
    6482             : 
    6483          11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6484             : {
    6485          11 :     return GWKRun(
    6486             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
    6487          11 :         GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
    6488             : }
    6489             : 
    6490          37 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
    6491             : {
    6492          37 :     return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
    6493             : }
    6494             : 
    6495             : /************************************************************************/
    6496             : /*                           GWKAverageOrMode()                         */
    6497             : /*                                                                      */
    6498             : /************************************************************************/
    6499             : 
    6500             : static void GWKAverageOrModeThread(void *pData);
    6501             : 
    6502         130 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
    6503             : {
    6504         130 :     return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
    6505             : }
    6506             : 
    6507             : // Overall logic based on GWKGeneralCaseThread().
    6508         130 : static void GWKAverageOrModeThread(void *pData)
    6509             : {
    6510         130 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6511         130 :     GDALWarpKernel *poWK = psJob->poWK;
    6512         130 :     const int iYMin = psJob->iYMin;
    6513         130 :     const int iYMax = psJob->iYMax;
    6514             :     const double dfMultFactorVerticalShiftPipeline =
    6515         130 :         poWK->bApplyVerticalShift
    6516         130 :             ? CPLAtof(CSLFetchNameValueDef(
    6517           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6518             :                   "1.0"))
    6519         130 :             : 0.0;
    6520             : 
    6521         130 :     const int nDstXSize = poWK->nDstXSize;
    6522         130 :     const int nSrcXSize = poWK->nSrcXSize;
    6523         130 :     const int nSrcYSize = poWK->nSrcYSize;
    6524             : 
    6525             :     /* -------------------------------------------------------------------- */
    6526             :     /*      Find out which algorithm to use (small optim.)                  */
    6527             :     /* -------------------------------------------------------------------- */
    6528         130 :     int nAlgo = 0;
    6529             : 
    6530             :     // Only used for GRA_Mode
    6531         130 :     float *pafRealVals = nullptr;
    6532         130 :     float *pafCounts = nullptr;
    6533         130 :     int nBins = 0;
    6534         130 :     int nBinsOffset = 0;
    6535         130 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    6536             : 
    6537             :     // Only used with nAlgo = 6.
    6538         130 :     float quant = 0.5;
    6539             : 
    6540             :     // To control array allocation only when data type is complex
    6541         130 :     const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
    6542             : 
    6543         130 :     if (poWK->eResample == GRA_Average)
    6544             :     {
    6545          71 :         nAlgo = GWKAOM_Average;
    6546             :     }
    6547          59 :     else if (poWK->eResample == GRA_RMS)
    6548             :     {
    6549           9 :         nAlgo = GWKAOM_RMS;
    6550             :     }
    6551          50 :     else if (poWK->eResample == GRA_Mode)
    6552             :     {
    6553             :         // TODO check color table count > 256.
    6554          23 :         if (poWK->eWorkingDataType == GDT_Byte ||
    6555          17 :             poWK->eWorkingDataType == GDT_UInt16 ||
    6556          17 :             poWK->eWorkingDataType == GDT_Int16)
    6557             :         {
    6558          14 :             nAlgo = GWKAOM_Imode;
    6559             : 
    6560             :             // In the case of a paletted or non-paletted byte band,
    6561             :             // Input values are between 0 and 255.
    6562          14 :             if (poWK->eWorkingDataType == GDT_Byte)
    6563             :             {
    6564           6 :                 nBins = 256;
    6565             :             }
    6566             :             // In the case of Int8, input values are between -128 and 127.
    6567           8 :             else if (poWK->eWorkingDataType == GDT_Int8)
    6568             :             {
    6569           0 :                 nBins = 256;
    6570           0 :                 nBinsOffset = 128;
    6571             :             }
    6572             :             // In the case of Int16, input values are between -32768 and 32767.
    6573           8 :             else if (poWK->eWorkingDataType == GDT_Int16)
    6574             :             {
    6575           8 :                 nBins = 65536;
    6576           8 :                 nBinsOffset = 32768;
    6577             :             }
    6578             :             // In the case of UInt16, input values are between 0 and 65537.
    6579           0 :             else if (poWK->eWorkingDataType == GDT_UInt16)
    6580             :             {
    6581           0 :                 nBins = 65536;
    6582             :             }
    6583             :             pafCounts =
    6584          14 :                 static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
    6585          14 :             if (pafCounts == nullptr)
    6586           0 :                 return;
    6587             :         }
    6588             :         else
    6589             :         {
    6590           9 :             nAlgo = GWKAOM_Fmode;
    6591             : 
    6592           9 :             if (nSrcXSize > 0 && nSrcYSize > 0)
    6593             :             {
    6594             :                 pafRealVals = static_cast<float *>(
    6595           9 :                     VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    6596             :                 pafCounts = static_cast<float *>(
    6597           9 :                     VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    6598           9 :                 if (pafRealVals == nullptr || pafCounts == nullptr)
    6599             :                 {
    6600           0 :                     VSIFree(pafRealVals);
    6601           0 :                     VSIFree(pafCounts);
    6602           0 :                     return;
    6603             :                 }
    6604             :             }
    6605             :         }
    6606             :     }
    6607          27 :     else if (poWK->eResample == GRA_Max)
    6608             :     {
    6609           6 :         nAlgo = GWKAOM_Max;
    6610             :     }
    6611          21 :     else if (poWK->eResample == GRA_Min)
    6612             :     {
    6613           5 :         nAlgo = GWKAOM_Min;
    6614             :     }
    6615          16 :     else if (poWK->eResample == GRA_Med)
    6616             :     {
    6617           6 :         nAlgo = GWKAOM_Quant;
    6618           6 :         quant = 0.5;
    6619             :     }
    6620          10 :     else if (poWK->eResample == GRA_Q1)
    6621             :     {
    6622           5 :         nAlgo = GWKAOM_Quant;
    6623           5 :         quant = 0.25;
    6624             :     }
    6625           5 :     else if (poWK->eResample == GRA_Q3)
    6626             :     {
    6627           5 :         nAlgo = GWKAOM_Quant;
    6628           5 :         quant = 0.75;
    6629             :     }
    6630             : #ifdef disabled
    6631             :     else if (poWK->eResample == GRA_Sum)
    6632             :     {
    6633             :         nAlgo = GWKAOM_Sum;
    6634             :     }
    6635             : #endif
    6636             :     else
    6637             :     {
    6638             :         // Other resample algorithms not permitted here.
    6639           0 :         CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
    6640             :                          "illegal resample");
    6641           0 :         return;
    6642             :     }
    6643             : 
    6644         130 :     CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() using algo %d",
    6645             :              nAlgo);
    6646             : 
    6647             :     /* -------------------------------------------------------------------- */
    6648             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    6649             :     /*      scanlines worth of positions.                                   */
    6650             :     /* -------------------------------------------------------------------- */
    6651             : 
    6652             :     double *padfX =
    6653         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6654             :     double *padfY =
    6655         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6656             :     double *padfZ =
    6657         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6658             :     double *padfX2 =
    6659         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6660             :     double *padfY2 =
    6661         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6662             :     double *padfZ2 =
    6663         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6664         130 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6665         130 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6666             : 
    6667         130 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6668         130 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6669         130 :     const double dfErrorThreshold = CPLAtof(
    6670         130 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6671             : 
    6672             :     const double dfExcludedValuesThreshold =
    6673         130 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    6674             :                                      "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
    6675         130 :         100.0;
    6676             :     const double dfNodataValuesThreshold =
    6677         130 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    6678             :                                      "NODATA_VALUES_PCT_THRESHOLD", "100")) /
    6679         130 :         100.0;
    6680             : 
    6681             :     const int nXMargin =
    6682         130 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    6683             :     const int nYMargin =
    6684         130 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    6685             : 
    6686             :     /* ==================================================================== */
    6687             :     /*      Loop over output lines.                                         */
    6688             :     /* ==================================================================== */
    6689        6627 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6690             :     {
    6691             : 
    6692             :         /* --------------------------------------------------------------------
    6693             :          */
    6694             :         /*      Setup points to transform to source image space. */
    6695             :         /* --------------------------------------------------------------------
    6696             :          */
    6697     1669840 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6698             :         {
    6699     1663340 :             padfX[iDstX] = iDstX + poWK->nDstXOff;
    6700     1663340 :             padfY[iDstX] = iDstY + poWK->nDstYOff;
    6701     1663340 :             padfZ[iDstX] = 0.0;
    6702     1663340 :             padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
    6703     1663340 :             padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
    6704     1663340 :             padfZ2[iDstX] = 0.0;
    6705             :         }
    6706             : 
    6707             :         /* --------------------------------------------------------------------
    6708             :          */
    6709             :         /*      Transform the points from destination pixel/line coordinates */
    6710             :         /*      to source pixel/line coordinates. */
    6711             :         /* --------------------------------------------------------------------
    6712             :          */
    6713        6497 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6714             :                              padfY, padfZ, pabSuccess);
    6715        6497 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
    6716             :                              padfY2, padfZ2, pabSuccess2);
    6717             : 
    6718        6497 :         if (dfSrcCoordPrecision > 0.0)
    6719             :         {
    6720           0 :             GWKRoundSourceCoordinates(
    6721             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6722             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6723           0 :                 poWK->nDstXOff, iDstY + poWK->nDstYOff);
    6724           0 :             GWKRoundSourceCoordinates(
    6725             :                 nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2,
    6726             :                 dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
    6727           0 :                 psJob->pTransformerArg, 1.0 + poWK->nDstXOff,
    6728           0 :                 iDstY + 1.0 + poWK->nDstYOff);
    6729             :         }
    6730             : 
    6731             :         /* ====================================================================
    6732             :          */
    6733             :         /*      Loop over pixels in output scanline. */
    6734             :         /* ====================================================================
    6735             :          */
    6736     1669840 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6737             :         {
    6738     1663340 :             GPtrDiff_t iSrcOffset = 0;
    6739     1663340 :             double dfDensity = 1.0;
    6740     1663340 :             bool bHasFoundDensity = false;
    6741             : 
    6742     1663340 :             if (!pabSuccess[iDstX] || !pabSuccess2[iDstX])
    6743      311460 :                 continue;
    6744             : 
    6745             :             // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
    6746             :             // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
    6747     1663340 :             if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6748     1663320 :                   padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6749     1663320 :                   padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6750     1663300 :                   padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6751     1663300 :                   padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6752     1663300 :                   padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6753     1663290 :                   padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
    6754     1663290 :                   padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
    6755             :             {
    6756          62 :                 continue;
    6757             :             }
    6758             : 
    6759     1663280 :             const GPtrDiff_t iDstOffset =
    6760     1663280 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6761             : 
    6762             :             // Compute corners in source crs.
    6763             : 
    6764             :             // The transformation might not have preserved ordering of
    6765             :             // coordinates so do the necessary swapping (#5433).
    6766             :             // NOTE: this is really an approximative fix. To do something
    6767             :             // more precise we would for example need to compute the
    6768             :             // transformation of coordinates in the
    6769             :             // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
    6770             :             // coordinates, and take the bounding box of the got source
    6771             :             // coordinates.
    6772             : 
    6773     1663280 :             if (padfX[iDstX] > padfX2[iDstX])
    6774      268744 :                 std::swap(padfX[iDstX], padfX2[iDstX]);
    6775             : 
    6776             :             // Detect situations where the target pixel is close to the
    6777             :             // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
    6778             :             // close to the left-most and right-most columns of the source
    6779             :             // raster. The 2 value below was experimentally determined to
    6780             :             // avoid false-positives and false-negatives.
    6781             :             // Addresses https://github.com/OSGeo/gdal/issues/6478
    6782     1663280 :             bool bWrapOverX = false;
    6783     1663280 :             const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
    6784     1663280 :             if (poWK->nSrcXOff == 0 &&
    6785     1663280 :                 padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
    6786       14495 :                 (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale <
    6787             :                     nThresholdWrapOverX)
    6788             :             {
    6789             :                 // Check there is a discontinuity by checking at mid-pixel.
    6790             :                 // NOTE: all this remains fragile. To confidently
    6791             :                 // detect antimeridian warping we should probably try to access
    6792             :                 // georeferenced coordinates, and not rely only on tests on
    6793             :                 // image space coordinates. But accessing georeferenced
    6794             :                 // coordinates from here is not trivial, and we would for example
    6795             :                 // have to handle both geographic, Mercator, etc.
    6796             :                 // Let's hope this heuristics is good enough for now.
    6797        1041 :                 double x = iDstX + 0.5 + poWK->nDstXOff;
    6798        1041 :                 double y = iDstY + poWK->nDstYOff;
    6799        1041 :                 double z = 0;
    6800        1041 :                 int bSuccess = FALSE;
    6801        1041 :                 poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y,
    6802             :                                      &z, &bSuccess);
    6803        1041 :                 if (bSuccess && x < padfX[iDstX])
    6804             :                 {
    6805        1008 :                     bWrapOverX = true;
    6806        1008 :                     std::swap(padfX[iDstX], padfX2[iDstX]);
    6807        1008 :                     padfX2[iDstX] += nSrcXSize;
    6808             :                 }
    6809             :             }
    6810             : 
    6811     1663280 :             const double dfXMin = padfX[iDstX] - poWK->nSrcXOff;
    6812     1663280 :             const double dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
    6813     1663280 :             constexpr double EPS = 1e-10;
    6814             :             // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
    6815     1663280 :             if (!(dfXMax > -EPS && dfXMin < nSrcXSize + EPS))
    6816          72 :                 continue;
    6817     1663200 :             int iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPS), 0.0));
    6818     1663200 :             int iSrcXMax = static_cast<int>(
    6819     1663200 :                 std::min(ceil(dfXMax - EPS), static_cast<double>(INT_MAX)));
    6820     1663200 :             if (!bWrapOverX)
    6821     1662200 :                 iSrcXMax = std::min(iSrcXMax, nSrcXSize);
    6822     1663200 :             if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
    6823         472 :                 iSrcXMax++;
    6824             : 
    6825     1663200 :             if (padfY[iDstX] > padfY2[iDstX])
    6826      270117 :                 std::swap(padfY[iDstX], padfY2[iDstX]);
    6827     1663200 :             const double dfYMin = padfY[iDstX] - poWK->nSrcYOff;
    6828     1663200 :             const double dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
    6829             :             // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
    6830     1663200 :             if (!(dfYMax > -EPS && dfYMin < nSrcYSize + EPS))
    6831          36 :                 continue;
    6832     1663170 :             int iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPS), 0.0));
    6833             :             int iSrcYMax =
    6834     1663170 :                 std::min(static_cast<int>(ceil(dfYMax - EPS)), nSrcYSize);
    6835     1663170 :             if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
    6836           0 :                 iSrcYMax++;
    6837             : 
    6838             : #define COMPUTE_WEIGHT_Y(iSrcY)                                                \
    6839             :     ((iSrcY == iSrcYMin)                                                       \
    6840             :          ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin))        \
    6841             :      : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax)                       \
    6842             :                                : 1.0)
    6843             : 
    6844             : #define COMPUTE_WEIGHT(iSrcX, dfWeightY)                                       \
    6845             :     ((iSrcX == iSrcXMin)       ? ((iSrcXMin + 1 == iSrcXMax)                   \
    6846             :                                       ? dfWeightY                              \
    6847             :                                       : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
    6848             :      : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax))         \
    6849             :                                : dfWeightY)
    6850             : 
    6851     1663170 :             bool bDone = false;
    6852             : 
    6853             :             // Special Average mode where we process all bands together,
    6854             :             // to avoid averaging tuples that match an entry of m_aadfExcludedValues
    6855     2267240 :             if (nAlgo == GWKAOM_Average &&
    6856      604073 :                 (!poWK->m_aadfExcludedValues.empty() ||
    6857      393224 :                  dfNodataValuesThreshold < 1 - EPS) &&
    6858     2267240 :                 !poWK->bApplyVerticalShift && !bIsComplex)
    6859             :             {
    6860      393224 :                 double dfTotalWeightInvalid = 0.0;
    6861      393224 :                 double dfTotalWeightExcluded = 0.0;
    6862      393224 :                 double dfTotalWeightRegular = 0.0;
    6863      786448 :                 std::vector<double> adfValueReal(poWK->nBands, 0);
    6864      786448 :                 std::vector<double> adfValueAveraged(poWK->nBands, 0);
    6865             :                 std::vector<int> anCountExcludedValues(
    6866      393224 :                     poWK->m_aadfExcludedValues.size(), 0);
    6867             : 
    6868     1572890 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    6869             :                 {
    6870     1179660 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    6871     1179660 :                     iSrcOffset =
    6872     1179660 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    6873     5111860 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    6874             :                          iSrcX++, iSrcOffset++)
    6875             :                     {
    6876     3932190 :                         if (bWrapOverX)
    6877           0 :                             iSrcOffset =
    6878           0 :                                 (iSrcX % nSrcXSize) +
    6879           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    6880             : 
    6881     3932190 :                         const double dfWeight =
    6882     3932190 :                             COMPUTE_WEIGHT(iSrcX, dfWeightY);
    6883     3932190 :                         if (dfWeight <= 0)
    6884           0 :                             continue;
    6885             : 
    6886     3932200 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    6887          12 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6888             :                         {
    6889           3 :                             dfTotalWeightInvalid += dfWeight;
    6890           3 :                             continue;
    6891             :                         }
    6892             : 
    6893     3932190 :                         bool bAllValid = true;
    6894     7274900 :                         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6895             :                         {
    6896     6160660 :                             double dfBandDensity = 0;
    6897     6160660 :                             double dfValueImagTmp = 0;
    6898     9503370 :                             if (!(GWKGetPixelValue(
    6899             :                                       poWK, iBand, iSrcOffset, &dfBandDensity,
    6900     6160660 :                                       &adfValueReal[iBand], &dfValueImagTmp) &&
    6901     3342710 :                                   dfBandDensity > BAND_DENSITY_THRESHOLD))
    6902             :                             {
    6903     2817950 :                                 bAllValid = false;
    6904     2817950 :                                 break;
    6905             :                             }
    6906             :                         }
    6907             : 
    6908     3932190 :                         if (!bAllValid)
    6909             :                         {
    6910     2817950 :                             dfTotalWeightInvalid += dfWeight;
    6911     2817950 :                             continue;
    6912             :                         }
    6913             : 
    6914     1114240 :                         bool bExcludedValueFound = false;
    6915     2228350 :                         for (size_t i = 0;
    6916     2228350 :                              i < poWK->m_aadfExcludedValues.size(); ++i)
    6917             :                         {
    6918     1114130 :                             if (poWK->m_aadfExcludedValues[i] == adfValueReal)
    6919             :                             {
    6920          21 :                                 bExcludedValueFound = true;
    6921          21 :                                 ++anCountExcludedValues[i];
    6922          21 :                                 dfTotalWeightExcluded += dfWeight;
    6923          21 :                                 break;
    6924             :                             }
    6925             :                         }
    6926     1114240 :                         if (!bExcludedValueFound)
    6927             :                         {
    6928             :                             // Weighted incremental algorithm mean
    6929             :                             // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    6930     1114220 :                             dfTotalWeightRegular += dfWeight;
    6931     4456870 :                             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6932             :                             {
    6933     3342650 :                                 adfValueAveraged[iBand] +=
    6934     6685300 :                                     (dfWeight / dfTotalWeightRegular) *
    6935     6685300 :                                     (adfValueReal[iBand] -
    6936     3342650 :                                      adfValueAveraged[iBand]);
    6937             :                             }
    6938             :                         }
    6939             :                     }
    6940             :                 }
    6941             : 
    6942      393224 :                 const double dfTotalWeight = dfTotalWeightInvalid +
    6943             :                                              dfTotalWeightExcluded +
    6944             :                                              dfTotalWeightRegular;
    6945      393224 :                 if (dfTotalWeightInvalid > 0 &&
    6946             :                     dfTotalWeightInvalid >=
    6947      311293 :                         dfNodataValuesThreshold * dfTotalWeight)
    6948             :                 {
    6949             :                     // Do nothing. Let bHasFoundDensity to false.
    6950             :                 }
    6951       81934 :                 else if (dfTotalWeightExcluded > 0 &&
    6952             :                          dfTotalWeightExcluded >=
    6953           6 :                              dfExcludedValuesThreshold * dfTotalWeight)
    6954             :                 {
    6955             :                     // Find the most represented excluded value tuple
    6956           3 :                     size_t iExcludedValue = 0;
    6957           3 :                     int nExcludedValueCount = 0;
    6958           6 :                     for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
    6959             :                          ++i)
    6960             :                     {
    6961           3 :                         if (anCountExcludedValues[i] > nExcludedValueCount)
    6962             :                         {
    6963           3 :                             iExcludedValue = i;
    6964           3 :                             nExcludedValueCount = anCountExcludedValues[i];
    6965             :                         }
    6966             :                     }
    6967             : 
    6968           3 :                     bHasFoundDensity = true;
    6969             : 
    6970          12 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6971             :                     {
    6972           9 :                         GWKSetPixelValue(
    6973             :                             poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
    6974           9 :                             poWK->m_aadfExcludedValues[iExcludedValue][iBand],
    6975             :                             0);
    6976           3 :                     }
    6977             :                 }
    6978       81931 :                 else if (dfTotalWeightRegular > 0)
    6979             :                 {
    6980       81931 :                     bHasFoundDensity = true;
    6981             : 
    6982      327720 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6983             :                     {
    6984      245789 :                         GWKSetPixelValue(poWK, iBand, iDstOffset,
    6985             :                                          /* dfBandDensity = */ 1.0,
    6986      245789 :                                          adfValueAveraged[iBand], 0);
    6987             :                     }
    6988             :                 }
    6989             : 
    6990             :                 // Skip below loop on bands
    6991      393224 :                 bDone = true;
    6992             :             }
    6993             : 
    6994             :             /* ====================================================================
    6995             :              */
    6996             :             /*      Loop processing each band. */
    6997             :             /* ====================================================================
    6998             :              */
    6999             : 
    7000     4439540 :             for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
    7001             :             {
    7002     2776380 :                 double dfBandDensity = 0.0;
    7003     2776380 :                 double dfValueReal = 0.0;
    7004     2776380 :                 double dfValueImag = 0.0;
    7005     2776380 :                 double dfValueRealTmp = 0.0;
    7006     2776380 :                 double dfValueImagTmp = 0.0;
    7007             : 
    7008             :                 /* --------------------------------------------------------------------
    7009             :                  */
    7010             :                 /*      Collect the source value. */
    7011             :                 /* --------------------------------------------------------------------
    7012             :                  */
    7013             : 
    7014             :                 // Loop over source lines and pixels - 3 possible algorithms.
    7015             : 
    7016             :                 // poWK->eResample == GRA_Average.
    7017     2776380 :                 if (nAlgo == GWKAOM_Average)
    7018             :                 {
    7019      300849 :                     double dfTotalWeight = 0.0;
    7020             : 
    7021             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7022             :                     // in gcore/overview.cpp.
    7023      631308 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7024             :                     {
    7025      330459 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7026      330459 :                         iSrcOffset = iSrcXMin +
    7027      330459 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7028      803200 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7029             :                              iSrcX++, iSrcOffset++)
    7030             :                         {
    7031      472741 :                             if (bWrapOverX)
    7032         630 :                                 iSrcOffset =
    7033         630 :                                     (iSrcX % nSrcXSize) +
    7034         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7035             : 
    7036      472745 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7037           4 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7038             :                                             iSrcOffset))
    7039             :                             {
    7040           1 :                                 continue;
    7041             :                             }
    7042             : 
    7043      472740 :                             if (GWKGetPixelValue(
    7044             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7045      945480 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7046      472740 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7047             :                             {
    7048      472740 :                                 const double dfWeight =
    7049      472740 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7050      472740 :                                 if (dfWeight > 0)
    7051             :                                 {
    7052             :                                     // Weighted incremental algorithm mean
    7053             :                                     // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7054      472740 :                                     dfTotalWeight += dfWeight;
    7055      472740 :                                     dfValueReal +=
    7056      472740 :                                         (dfWeight / dfTotalWeight) *
    7057      472740 :                                         (dfValueRealTmp - dfValueReal);
    7058      472740 :                                     if (bIsComplex)
    7059             :                                     {
    7060         252 :                                         dfValueImag +=
    7061         252 :                                             (dfWeight / dfTotalWeight) *
    7062         252 :                                             (dfValueImagTmp - dfValueImag);
    7063             :                                     }
    7064             :                                 }
    7065             :                             }
    7066             :                         }
    7067             :                     }
    7068             : 
    7069      300849 :                     if (dfTotalWeight > 0)
    7070             :                     {
    7071      300849 :                         if (poWK->bApplyVerticalShift)
    7072             :                         {
    7073           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7074           0 :                                 continue;
    7075             :                             // Subtract padfZ[] since the coordinate
    7076             :                             // transformation is from target to source
    7077           0 :                             dfValueReal =
    7078           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7079           0 :                                 padfZ[iDstX] *
    7080             :                                     dfMultFactorVerticalShiftPipeline;
    7081             :                         }
    7082             : 
    7083      300849 :                         dfBandDensity = 1;
    7084      300849 :                         bHasFoundDensity = true;
    7085             :                     }
    7086             :                 }  // GRA_Average.
    7087             :                 // poWK->eResample == GRA_RMS.
    7088     2776380 :                 if (nAlgo == GWKAOM_RMS)
    7089             :                 {
    7090      300416 :                     double dfTotalReal = 0.0;
    7091      300416 :                     double dfTotalImag = 0.0;
    7092      300416 :                     double dfTotalWeight = 0.0;
    7093             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7094             :                     // in gcore/overview.cpp.
    7095      630578 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7096             :                     {
    7097      330162 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7098      330162 :                         iSrcOffset = iSrcXMin +
    7099      330162 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7100      802723 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7101             :                              iSrcX++, iSrcOffset++)
    7102             :                         {
    7103      472561 :                             if (bWrapOverX)
    7104         630 :                                 iSrcOffset =
    7105         630 :                                     (iSrcX % nSrcXSize) +
    7106         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7107             : 
    7108      472561 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7109           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7110             :                                             iSrcOffset))
    7111             :                             {
    7112           0 :                                 continue;
    7113             :                             }
    7114             : 
    7115      472561 :                             if (GWKGetPixelValue(
    7116             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7117      945122 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7118      472561 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7119             :                             {
    7120      472561 :                                 const double dfWeight =
    7121      472561 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7122      472561 :                                 dfTotalWeight += dfWeight;
    7123      472561 :                                 dfTotalReal +=
    7124      472561 :                                     dfValueRealTmp * dfValueRealTmp * dfWeight;
    7125      472561 :                                 if (bIsComplex)
    7126          48 :                                     dfTotalImag += dfValueImagTmp *
    7127          48 :                                                    dfValueImagTmp * dfWeight;
    7128             :                             }
    7129             :                         }
    7130             :                     }
    7131             : 
    7132      300416 :                     if (dfTotalWeight > 0)
    7133             :                     {
    7134      300416 :                         dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
    7135             : 
    7136      300416 :                         if (poWK->bApplyVerticalShift)
    7137             :                         {
    7138           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7139           0 :                                 continue;
    7140             :                             // Subtract padfZ[] since the coordinate
    7141             :                             // transformation is from target to source
    7142           0 :                             dfValueReal =
    7143           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7144           0 :                                 padfZ[iDstX] *
    7145             :                                     dfMultFactorVerticalShiftPipeline;
    7146             :                         }
    7147             : 
    7148      300416 :                         if (bIsComplex)
    7149          12 :                             dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
    7150             : 
    7151      300416 :                         dfBandDensity = 1;
    7152      300416 :                         bHasFoundDensity = true;
    7153             :                     }
    7154             :                 }  // GRA_RMS.
    7155             : #ifdef disabled
    7156             :                 else if (nAlgo == GWKAOM_Sum)
    7157             :                 // poWK->eResample == GRA_Sum
    7158             :                 {
    7159             :                     double dfTotalReal = 0.0;
    7160             :                     double dfTotalImag = 0.0;
    7161             :                     bool bFoundValid = false;
    7162             : 
    7163             :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7164             :                     {
    7165             :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7166             :                         iSrcOffset = iSrcXMin +
    7167             :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7168             :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7169             :                              iSrcX++, iSrcOffset++)
    7170             :                         {
    7171             :                             if (bWrapOverX)
    7172             :                                 iSrcOffset =
    7173             :                                     (iSrcX % nSrcXSize) +
    7174             :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7175             : 
    7176             :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7177             :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7178             :                                             iSrcOffset))
    7179             :                             {
    7180             :                                 continue;
    7181             :                             }
    7182             : 
    7183             :                             if (GWKGetPixelValue(
    7184             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7185             :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7186             :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7187             :                             {
    7188             :                                 const double dfWeight =
    7189             :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7190             :                                 bFoundValid = true;
    7191             :                                 dfTotalReal += dfValueRealTmp * dfWeight;
    7192             :                                 if (bIsComplex)
    7193             :                                 {
    7194             :                                     dfTotalImag += dfValueImagTmp * dfWeight;
    7195             :                                 }
    7196             :                             }
    7197             :                         }
    7198             :                     }
    7199             : 
    7200             :                     if (bFoundValid)
    7201             :                     {
    7202             :                         dfValueReal = dfTotalReal;
    7203             : 
    7204             :                         if (poWK->bApplyVerticalShift)
    7205             :                         {
    7206             :                             if (!std::isfinite(padfZ[iDstX]))
    7207             :                                 continue;
    7208             :                             // Subtract padfZ[] since the coordinate
    7209             :                             // transformation is from target to source
    7210             :                             dfValueReal =
    7211             :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7212             :                                 padfZ[iDstX] *
    7213             :                                     dfMultFactorVerticalShiftPipeline;
    7214             :                         }
    7215             : 
    7216             :                         if (bIsComplex)
    7217             :                         {
    7218             :                             dfValueImag = dfTotalImag;
    7219             :                         }
    7220             :                         dfBandDensity = 1;
    7221             :                         bHasFoundDensity = true;
    7222             :                     }
    7223             :                 }  // GRA_Sum.
    7224             : #endif
    7225     2475960 :                 else if (nAlgo == GWKAOM_Imode || nAlgo == GWKAOM_Fmode)
    7226             :                 // poWK->eResample == GRA_Mode
    7227             :                 {
    7228             :                     // This code adapted from GDALDownsampleChunk32R_Mode() in
    7229             :                     // gcore/overview.cpp.
    7230      500026 :                     if (nAlgo == GWKAOM_Fmode)  // int32 or float.
    7231             :                     {
    7232             :                         // Does it make sense it makes to run a
    7233             :                         // majority filter on floating point data? But, here it
    7234             :                         // is for the sake of compatibility. It won't look
    7235             :                         // right on RGB images by the nature of the filter.
    7236        3407 :                         nBins = 0;
    7237        3407 :                         int iModeIndex = -1;
    7238             : 
    7239       10228 :                         for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7240             :                         {
    7241        6821 :                             const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7242        6821 :                             iSrcOffset =
    7243        6821 :                                 iSrcXMin +
    7244        6821 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7245       20484 :                             for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7246             :                                  iSrcX++, iSrcOffset++)
    7247             :                             {
    7248       13663 :                                 if (bWrapOverX)
    7249           0 :                                     iSrcOffset =
    7250           0 :                                         (iSrcX % nSrcXSize) +
    7251           0 :                                         static_cast<GPtrDiff_t>(iSrcY) *
    7252           0 :                                             nSrcXSize;
    7253             : 
    7254       13663 :                                 if (poWK->panUnifiedSrcValid != nullptr &&
    7255           0 :                                     !CPLMaskGet(poWK->panUnifiedSrcValid,
    7256             :                                                 iSrcOffset))
    7257           0 :                                     continue;
    7258             : 
    7259       13663 :                                 if (GWKGetPixelValue(
    7260             :                                         poWK, iBand, iSrcOffset, &dfBandDensity,
    7261       27326 :                                         &dfValueRealTmp, &dfValueImagTmp) &&
    7262       13663 :                                     dfBandDensity > BAND_DENSITY_THRESHOLD)
    7263             :                                 {
    7264       13663 :                                     const float fVal =
    7265       13663 :                                         static_cast<float>(dfValueRealTmp);
    7266       13663 :                                     const double dfWeight =
    7267       13663 :                                         COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7268             : 
    7269             :                                     // Check array for existing entry.
    7270       13663 :                                     int i = 0;
    7271       29135 :                                     for (i = 0; i < nBins; ++i)
    7272             :                                     {
    7273       17768 :                                         if (pafRealVals[i] == fVal)
    7274             :                                         {
    7275             : 
    7276        2296 :                                             pafCounts[i] +=
    7277        2296 :                                                 static_cast<float>(dfWeight);
    7278        2296 :                                             bool bValIsMaxCount =
    7279        2296 :                                                 (pafCounts[i] >
    7280        2296 :                                                  pafCounts[iModeIndex]);
    7281             : 
    7282        2296 :                                             if (!bValIsMaxCount &&
    7283        1492 :                                                 pafCounts[i] ==
    7284        1492 :                                                     pafCounts[iModeIndex])
    7285             :                                             {
    7286        1487 :                                                 switch (eTieStrategy)
    7287             :                                                 {
    7288        1474 :                                                     case GWKTS_First:
    7289        1474 :                                                         break;
    7290           6 :                                                     case GWKTS_Min:
    7291           6 :                                                         bValIsMaxCount =
    7292             :                                                             fVal <
    7293             :                                                             pafRealVals
    7294           6 :                                                                 [iModeIndex];
    7295           6 :                                                         break;
    7296           7 :                                                     case GWKTS_Max:
    7297           7 :                                                         bValIsMaxCount =
    7298             :                                                             fVal >
    7299             :                                                             pafRealVals
    7300           7 :                                                                 [iModeIndex];
    7301           7 :                                                         break;
    7302             :                                                 }
    7303             :                                             }
    7304             : 
    7305        2296 :                                             if (bValIsMaxCount)
    7306             :                                             {
    7307         807 :                                                 iModeIndex = i;
    7308             :                                             }
    7309             : 
    7310        2296 :                                             break;
    7311             :                                         }
    7312             :                                     }
    7313             : 
    7314             :                                     // Add to arr if entry not already there.
    7315       13663 :                                     if (i == nBins)
    7316             :                                     {
    7317       11367 :                                         pafRealVals[i] = fVal;
    7318       11367 :                                         pafCounts[i] =
    7319       11367 :                                             static_cast<float>(dfWeight);
    7320             : 
    7321       11367 :                                         if (iModeIndex < 0)
    7322        3407 :                                             iModeIndex = i;
    7323             : 
    7324       11367 :                                         ++nBins;
    7325             :                                     }
    7326             :                                 }
    7327             :                             }
    7328             :                         }
    7329             : 
    7330        3407 :                         if (iModeIndex != -1)
    7331             :                         {
    7332        3407 :                             dfValueReal = pafRealVals[iModeIndex];
    7333             : 
    7334        3407 :                             if (poWK->bApplyVerticalShift)
    7335             :                             {
    7336           0 :                                 if (!std::isfinite(padfZ[iDstX]))
    7337           0 :                                     continue;
    7338             :                                 // Subtract padfZ[] since the coordinate
    7339             :                                 // transformation is from target to source
    7340           0 :                                 dfValueReal =
    7341           0 :                                     dfValueReal *
    7342           0 :                                         poWK->dfMultFactorVerticalShift -
    7343           0 :                                     padfZ[iDstX] *
    7344             :                                         dfMultFactorVerticalShiftPipeline;
    7345             :                             }
    7346             : 
    7347        3407 :                             dfBandDensity = 1;
    7348        3407 :                             bHasFoundDensity = true;
    7349             :                         }
    7350             :                     }
    7351             :                     else  // byte or int16.
    7352             :                     {
    7353      496619 :                         float fMaxCount = 0.0f;
    7354      496619 :                         int nMode = -1;
    7355      496619 :                         bool bHasSourceValues = false;
    7356             : 
    7357      496619 :                         memset(pafCounts, 0, nBins * sizeof(float));
    7358             : 
    7359     1612550 :                         for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7360             :                         {
    7361     1115930 :                             const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7362     1115930 :                             iSrcOffset =
    7363     1115930 :                                 iSrcXMin +
    7364     1115930 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7365     4733150 :                             for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7366             :                                  iSrcX++, iSrcOffset++)
    7367             :                             {
    7368     3617220 :                                 if (bWrapOverX)
    7369         630 :                                     iSrcOffset =
    7370         630 :                                         (iSrcX % nSrcXSize) +
    7371         630 :                                         static_cast<GPtrDiff_t>(iSrcY) *
    7372         630 :                                             nSrcXSize;
    7373             : 
    7374     3617220 :                                 if (poWK->panUnifiedSrcValid != nullptr &&
    7375           0 :                                     !CPLMaskGet(poWK->panUnifiedSrcValid,
    7376             :                                                 iSrcOffset))
    7377           0 :                                     continue;
    7378             : 
    7379     3617220 :                                 if (GWKGetPixelValue(
    7380             :                                         poWK, iBand, iSrcOffset, &dfBandDensity,
    7381     7234430 :                                         &dfValueRealTmp, &dfValueImagTmp) &&
    7382     3617220 :                                     dfBandDensity > BAND_DENSITY_THRESHOLD)
    7383             :                                 {
    7384     3617220 :                                     bHasSourceValues = true;
    7385     3617220 :                                     const int nVal =
    7386     3617220 :                                         static_cast<int>(dfValueRealTmp);
    7387     3617220 :                                     const int iBin = nVal + nBinsOffset;
    7388     3617220 :                                     const double dfWeight =
    7389     3617220 :                                         COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7390             : 
    7391             :                                     // Sum the density.
    7392     3617220 :                                     pafCounts[iBin] +=
    7393     3617220 :                                         static_cast<float>(dfWeight);
    7394             :                                     // Is it the most common value so far?
    7395     3617220 :                                     bool bUpdateMode =
    7396     3617220 :                                         pafCounts[iBin] > fMaxCount;
    7397     3617220 :                                     if (!bUpdateMode &&
    7398      778312 :                                         pafCounts[iBin] == fMaxCount)
    7399             :                                     {
    7400      218624 :                                         switch (eTieStrategy)
    7401             :                                         {
    7402      218616 :                                             case GWKTS_First:
    7403      218616 :                                                 break;
    7404           4 :                                             case GWKTS_Min:
    7405           4 :                                                 bUpdateMode = nVal < nMode;
    7406           4 :                                                 break;
    7407           4 :                                             case GWKTS_Max:
    7408           4 :                                                 bUpdateMode = nVal > nMode;
    7409           4 :                                                 break;
    7410             :                                         }
    7411             :                                     }
    7412     3617220 :                                     if (bUpdateMode)
    7413             :                                     {
    7414     2838910 :                                         nMode = nVal;
    7415     2838910 :                                         fMaxCount = pafCounts[iBin];
    7416             :                                     }
    7417             :                                 }
    7418             :                             }
    7419             :                         }
    7420             : 
    7421      496619 :                         if (bHasSourceValues)
    7422             :                         {
    7423      496619 :                             dfValueReal = nMode;
    7424             : 
    7425      496619 :                             if (poWK->bApplyVerticalShift)
    7426             :                             {
    7427           0 :                                 if (!std::isfinite(padfZ[iDstX]))
    7428           0 :                                     continue;
    7429             :                                 // Subtract padfZ[] since the coordinate
    7430             :                                 // transformation is from target to source
    7431           0 :                                 dfValueReal =
    7432           0 :                                     dfValueReal *
    7433           0 :                                         poWK->dfMultFactorVerticalShift -
    7434           0 :                                     padfZ[iDstX] *
    7435             :                                         dfMultFactorVerticalShiftPipeline;
    7436             :                             }
    7437             : 
    7438      496619 :                             dfBandDensity = 1;
    7439      496619 :                             bHasFoundDensity = true;
    7440             :                         }
    7441      500026 :                     }
    7442             :                 }  // GRA_Mode.
    7443     1975930 :                 else if (nAlgo == GWKAOM_Max)
    7444             :                 // poWK->eResample == GRA_Max.
    7445             :                 {
    7446      335037 :                     bool bFoundValid = false;
    7447      335037 :                     double dfTotalReal = cpl::NumericLimits<double>::lowest();
    7448             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7449     1288010 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7450             :                     {
    7451      952975 :                         iSrcOffset = iSrcXMin +
    7452      952975 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7453     4406540 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7454             :                              iSrcX++, iSrcOffset++)
    7455             :                         {
    7456     3453560 :                             if (bWrapOverX)
    7457         630 :                                 iSrcOffset =
    7458         630 :                                     (iSrcX % nSrcXSize) +
    7459         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7460             : 
    7461     3456370 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7462        2809 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7463             :                                             iSrcOffset))
    7464             :                             {
    7465        2446 :                                 continue;
    7466             :                             }
    7467             : 
    7468             :                             // Returns pixel value if it is not no data.
    7469     3451120 :                             if (GWKGetPixelValue(
    7470             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7471     6902230 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7472     3451120 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7473             :                             {
    7474     3451120 :                                 bFoundValid = true;
    7475     3451120 :                                 if (dfTotalReal < dfValueRealTmp)
    7476             :                                 {
    7477      442642 :                                     dfTotalReal = dfValueRealTmp;
    7478             :                                 }
    7479             :                             }
    7480             :                         }
    7481             :                     }
    7482             : 
    7483      335037 :                     if (bFoundValid)
    7484             :                     {
    7485      335037 :                         dfValueReal = dfTotalReal;
    7486             : 
    7487      335037 :                         if (poWK->bApplyVerticalShift)
    7488             :                         {
    7489           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7490           0 :                                 continue;
    7491             :                             // Subtract padfZ[] since the coordinate
    7492             :                             // transformation is from target to source
    7493           0 :                             dfValueReal =
    7494           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7495           0 :                                 padfZ[iDstX] *
    7496             :                                     dfMultFactorVerticalShiftPipeline;
    7497             :                         }
    7498             : 
    7499      335037 :                         dfBandDensity = 1;
    7500      335037 :                         bHasFoundDensity = true;
    7501             :                     }
    7502             :                 }  // GRA_Max.
    7503     1640900 :                 else if (nAlgo == GWKAOM_Min)
    7504             :                 // poWK->eResample == GRA_Min.
    7505             :                 {
    7506      335012 :                     bool bFoundValid = false;
    7507      335012 :                     double dfTotalReal = cpl::NumericLimits<double>::max();
    7508             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7509     1287720 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7510             :                     {
    7511      952710 :                         iSrcOffset = iSrcXMin +
    7512      952710 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7513     4403460 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7514             :                              iSrcX++, iSrcOffset++)
    7515             :                         {
    7516     3450750 :                             if (bWrapOverX)
    7517         630 :                                 iSrcOffset =
    7518         630 :                                     (iSrcX % nSrcXSize) +
    7519         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7520             : 
    7521     3450750 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7522           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7523             :                                             iSrcOffset))
    7524             :                             {
    7525           0 :                                 continue;
    7526             :                             }
    7527             : 
    7528             :                             // Returns pixel value if it is not no data.
    7529     3450750 :                             if (GWKGetPixelValue(
    7530             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7531     6901500 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7532     3450750 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7533             :                             {
    7534     3450750 :                                 bFoundValid = true;
    7535     3450750 :                                 if (dfTotalReal > dfValueRealTmp)
    7536             :                                 {
    7537      443069 :                                     dfTotalReal = dfValueRealTmp;
    7538             :                                 }
    7539             :                             }
    7540             :                         }
    7541             :                     }
    7542             : 
    7543      335012 :                     if (bFoundValid)
    7544             :                     {
    7545      335012 :                         dfValueReal = dfTotalReal;
    7546             : 
    7547      335012 :                         if (poWK->bApplyVerticalShift)
    7548             :                         {
    7549           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7550           0 :                                 continue;
    7551             :                             // Subtract padfZ[] since the coordinate
    7552             :                             // transformation is from target to source
    7553           0 :                             dfValueReal =
    7554           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7555           0 :                                 padfZ[iDstX] *
    7556             :                                     dfMultFactorVerticalShiftPipeline;
    7557             :                         }
    7558             : 
    7559      335012 :                         dfBandDensity = 1;
    7560      335012 :                         bHasFoundDensity = true;
    7561             :                     }
    7562             :                 }  // GRA_Min.
    7563     1305880 :                 else if (nAlgo == GWKAOM_Quant)
    7564             :                 // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
    7565             :                 {
    7566     1005040 :                     bool bFoundValid = false;
    7567     1005040 :                     std::vector<double> dfRealValuesTmp;
    7568             : 
    7569             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7570     3863170 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7571             :                     {
    7572     2858130 :                         iSrcOffset = iSrcXMin +
    7573     2858130 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7574    13210400 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7575             :                              iSrcX++, iSrcOffset++)
    7576             :                         {
    7577    10352300 :                             if (bWrapOverX)
    7578        1890 :                                 iSrcOffset =
    7579        1890 :                                     (iSrcX % nSrcXSize) +
    7580        1890 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7581             : 
    7582    10352300 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7583           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7584             :                                             iSrcOffset))
    7585             :                             {
    7586           0 :                                 continue;
    7587             :                             }
    7588             : 
    7589             :                             // Returns pixel value if it is not no data.
    7590    10352300 :                             if (GWKGetPixelValue(
    7591             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7592    20704500 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7593    10352300 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7594             :                             {
    7595    10352300 :                                 bFoundValid = true;
    7596    10352300 :                                 dfRealValuesTmp.push_back(dfValueRealTmp);
    7597             :                             }
    7598             :                         }
    7599             :                     }
    7600             : 
    7601     1005040 :                     if (bFoundValid)
    7602             :                     {
    7603     1005040 :                         std::sort(dfRealValuesTmp.begin(),
    7604             :                                   dfRealValuesTmp.end());
    7605             :                         int quantIdx = static_cast<int>(
    7606     1005040 :                             std::ceil(quant * dfRealValuesTmp.size() - 1));
    7607     1005040 :                         dfValueReal = dfRealValuesTmp[quantIdx];
    7608             : 
    7609     1005040 :                         if (poWK->bApplyVerticalShift)
    7610             :                         {
    7611           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7612           0 :                                 continue;
    7613             :                             // Subtract padfZ[] since the coordinate
    7614             :                             // transformation is from target to source
    7615           0 :                             dfValueReal =
    7616           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7617           0 :                                 padfZ[iDstX] *
    7618             :                                     dfMultFactorVerticalShiftPipeline;
    7619             :                         }
    7620             : 
    7621     1005040 :                         dfBandDensity = 1;
    7622     1005040 :                         bHasFoundDensity = true;
    7623     1005040 :                         dfRealValuesTmp.clear();
    7624             :                     }
    7625             :                 }  // Quantile.
    7626             : 
    7627             :                 /* --------------------------------------------------------------------
    7628             :                  */
    7629             :                 /*      We have a computed value from the source.  Now apply it
    7630             :                  * to      */
    7631             :                 /*      the destination pixel. */
    7632             :                 /* --------------------------------------------------------------------
    7633             :                  */
    7634     2776380 :                 if (bHasFoundDensity)
    7635             :                 {
    7636             :                     // TODO: Should we compute dfBandDensity in fct of
    7637             :                     // nCount/nCount2, or use as a threshold to set the dest
    7638             :                     // value?
    7639             :                     // dfBandDensity = (float) nCount / nCount2;
    7640             :                     // if( (float) nCount / nCount2 > 0.1 )
    7641             :                     // or fix gdalwarp crop_to_cutline to crop partially
    7642             :                     // overlapping pixels.
    7643     2776380 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    7644             :                                      dfValueReal, dfValueImag);
    7645             :                 }
    7646             :             }
    7647             : 
    7648     1663170 :             if (!bHasFoundDensity)
    7649      311290 :                 continue;
    7650             : 
    7651             :             /* --------------------------------------------------------------------
    7652             :              */
    7653             :             /*      Update destination density/validity masks. */
    7654             :             /* --------------------------------------------------------------------
    7655             :              */
    7656     1351880 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7657             : 
    7658     1351880 :             if (poWK->panDstValid != nullptr)
    7659             :             {
    7660          74 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7661             :             }
    7662             :         } /* Next iDstX */
    7663             : 
    7664             :         /* --------------------------------------------------------------------
    7665             :          */
    7666             :         /*      Report progress to the user, and optionally cancel out. */
    7667             :         /* --------------------------------------------------------------------
    7668             :          */
    7669        6497 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    7670           0 :             break;
    7671             :     }
    7672             : 
    7673             :     /* -------------------------------------------------------------------- */
    7674             :     /*      Cleanup and return.                                             */
    7675             :     /* -------------------------------------------------------------------- */
    7676         130 :     CPLFree(padfX);
    7677         130 :     CPLFree(padfY);
    7678         130 :     CPLFree(padfZ);
    7679         130 :     CPLFree(padfX2);
    7680         130 :     CPLFree(padfY2);
    7681         130 :     CPLFree(padfZ2);
    7682         130 :     CPLFree(pabSuccess);
    7683         130 :     CPLFree(pabSuccess2);
    7684         130 :     VSIFree(pafCounts);
    7685         130 :     VSIFree(pafRealVals);
    7686             : }
    7687             : 
    7688             : /************************************************************************/
    7689             : /*                         getOrientation()                             */
    7690             : /************************************************************************/
    7691             : 
    7692             : typedef std::pair<double, double> XYPair;
    7693             : 
    7694             : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
    7695             : // -1 if it is counter-clockwise oriented,
    7696             : // or 0 if it is colinear.
    7697     2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
    7698             : {
    7699     2355910 :     const double p1x = p1.first;
    7700     2355910 :     const double p1y = p1.second;
    7701     2355910 :     const double p2x = p2.first;
    7702     2355910 :     const double p2y = p2.second;
    7703     2355910 :     const double p3x = p3.first;
    7704     2355910 :     const double p3y = p3.second;
    7705     2355910 :     const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
    7706     2355910 :     if (std::abs(val) < 1e-20)
    7707        2690 :         return 0;
    7708     2353220 :     else if (val > 0)
    7709           0 :         return 1;
    7710             :     else
    7711     2353220 :         return -1;
    7712             : }
    7713             : 
    7714             : /************************************************************************/
    7715             : /*                          isConvex()                                  */
    7716             : /************************************************************************/
    7717             : 
    7718             : typedef std::vector<XYPair> XYPoly;
    7719             : 
    7720             : // poly must be closed
    7721      785302 : static bool isConvex(const XYPoly &poly)
    7722             : {
    7723      785302 :     const size_t n = poly.size();
    7724      785302 :     size_t i = 0;
    7725      785302 :     int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    7726      785302 :     ++i;
    7727     2355910 :     for (; i < n - 2; ++i)
    7728             :     {
    7729             :         const int orientation =
    7730     1570600 :             getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    7731     1570600 :         if (orientation != 0)
    7732             :         {
    7733     1567910 :             if (last_orientation == 0)
    7734           0 :                 last_orientation = orientation;
    7735     1567910 :             else if (orientation != last_orientation)
    7736           0 :                 return false;
    7737             :         }
    7738             :     }
    7739      785302 :     return true;
    7740             : }
    7741             : 
    7742             : /************************************************************************/
    7743             : /*                     pointIntersectsConvexPoly()                      */
    7744             : /************************************************************************/
    7745             : 
    7746             : // Returns whether xy intersects poly, that must be closed and convex.
    7747     6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
    7748             : {
    7749     6049100 :     const size_t n = poly.size();
    7750     6049100 :     double dx1 = xy.first - poly[0].first;
    7751     6049100 :     double dy1 = xy.second - poly[0].second;
    7752     6049100 :     double dx2 = poly[1].first - poly[0].first;
    7753     6049100 :     double dy2 = poly[1].second - poly[0].second;
    7754     6049100 :     double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
    7755             : 
    7756             :     // Check if the point remains on the same side (left/right) of all edges
    7757    14556400 :     for (size_t i = 2; i < n; i++)
    7758             :     {
    7759    12793100 :         dx1 = xy.first - poly[i - 1].first;
    7760    12793100 :         dy1 = xy.second - poly[i - 1].second;
    7761             : 
    7762    12793100 :         dx2 = poly[i].first - poly[i - 1].first;
    7763    12793100 :         dy2 = poly[i].second - poly[i - 1].second;
    7764             : 
    7765    12793100 :         double crossProduct = dx1 * dy2 - dx2 * dy1;
    7766    12793100 :         if (std::abs(prevCrossProduct) < 1e-20)
    7767      725558 :             prevCrossProduct = crossProduct;
    7768    12067500 :         else if (prevCrossProduct * crossProduct < 0)
    7769     4285760 :             return false;
    7770             :     }
    7771             : 
    7772     1763340 :     return true;
    7773             : }
    7774             : 
    7775             : /************************************************************************/
    7776             : /*                     getIntersection()                                */
    7777             : /************************************************************************/
    7778             : 
    7779             : /* Returns intersection of [p1,p2] with [p3,p4], if
    7780             :  * it is a single point, and the 2 segments are not colinear.
    7781             :  */
    7782    11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
    7783             :                             const XYPair &p3, const XYPair &p4, XYPair &xy)
    7784             : {
    7785    11811000 :     const double x1 = p1.first;
    7786    11811000 :     const double y1 = p1.second;
    7787    11811000 :     const double x2 = p2.first;
    7788    11811000 :     const double y2 = p2.second;
    7789    11811000 :     const double x3 = p3.first;
    7790    11811000 :     const double y3 = p3.second;
    7791    11811000 :     const double x4 = p4.first;
    7792    11811000 :     const double y4 = p4.second;
    7793    11811000 :     const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
    7794    11811000 :     const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
    7795    11811000 :     if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
    7796     9260780 :         return false;
    7797             : 
    7798     2550260 :     const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
    7799     2550260 :     if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
    7800      973924 :         return false;
    7801             : 
    7802     1576340 :     const double t = t_num / denom;
    7803     1576340 :     xy.first = x1 + t * (x2 - x1);
    7804     1576340 :     xy.second = y1 + t * (y2 - y1);
    7805     1576340 :     return true;
    7806             : }
    7807             : 
    7808             : /************************************************************************/
    7809             : /*                     getConvexPolyIntersection()                      */
    7810             : /************************************************************************/
    7811             : 
    7812             : // poly1 and poly2 must be closed and convex.
    7813             : // The returned intersection will not necessary be closed.
    7814      785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
    7815             :                                       XYPoly &intersection)
    7816             : {
    7817      785302 :     intersection.clear();
    7818             : 
    7819             :     // Add all points of poly1 inside poly2
    7820     3926510 :     for (size_t i = 0; i < poly1.size() - 1; ++i)
    7821             :     {
    7822     3141210 :         if (pointIntersectsConvexPoly(poly1[i], poly2))
    7823     1187430 :             intersection.push_back(poly1[i]);
    7824             :     }
    7825      785302 :     if (intersection.size() == poly1.size() - 1)
    7826             :     {
    7827             :         // poly1 is inside poly2
    7828      119100 :         return;
    7829             :     }
    7830             : 
    7831             :     // Add all points of poly2 inside poly1
    7832     3634860 :     for (size_t i = 0; i < poly2.size() - 1; ++i)
    7833             :     {
    7834     2907890 :         if (pointIntersectsConvexPoly(poly2[i], poly1))
    7835      575904 :             intersection.push_back(poly2[i]);
    7836             :     }
    7837             : 
    7838             :     // Compute the intersection of all edges of both polygons
    7839      726972 :     XYPair xy;
    7840     3634860 :     for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
    7841             :     {
    7842    14539400 :         for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
    7843             :         {
    7844    11631600 :             if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
    7845    11631600 :                                 poly2[i2 + 1], xy))
    7846             :             {
    7847     1576230 :                 intersection.push_back(xy);
    7848             :             }
    7849             :         }
    7850             :     }
    7851             : 
    7852      726972 :     if (intersection.empty())
    7853       60770 :         return;
    7854             : 
    7855             :     // Find lowest-left point in intersection set
    7856      666202 :     double lowest_x = cpl::NumericLimits<double>::max();
    7857      666202 :     double lowest_y = cpl::NumericLimits<double>::max();
    7858     3772450 :     for (const auto &pair : intersection)
    7859             :     {
    7860     3106240 :         const double x = pair.first;
    7861     3106240 :         const double y = pair.second;
    7862     3106240 :         if (y < lowest_y || (y == lowest_y && x < lowest_x))
    7863             :         {
    7864     1096040 :             lowest_x = x;
    7865     1096040 :             lowest_y = y;
    7866             :         }
    7867             :     }
    7868             : 
    7869     5737980 :     const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
    7870             :     {
    7871     5737980 :         const double p1x_diff = p1.first - lowest_x;
    7872     5737980 :         const double p1y_diff = p1.second - lowest_y;
    7873     5737980 :         const double p2x_diff = p2.first - lowest_x;
    7874     5737980 :         const double p2y_diff = p2.second - lowest_y;
    7875     5737980 :         if (p2y_diff == 0.0 && p1y_diff == 0.0)
    7876             :         {
    7877     2655420 :             if (p1x_diff >= 0)
    7878             :             {
    7879     2655420 :                 if (p2x_diff >= 0)
    7880     2655420 :                     return p1.first < p2.first;
    7881           0 :                 return true;
    7882             :             }
    7883             :             else
    7884             :             {
    7885           0 :                 if (p2x_diff >= 0)
    7886           0 :                     return false;
    7887           0 :                 return p1.first < p2.first;
    7888             :             }
    7889             :         }
    7890             : 
    7891     3082560 :         if (p2x_diff == 0.0 && p1x_diff == 0.0)
    7892     1046960 :             return p1.second < p2.second;
    7893             : 
    7894             :         double tan_p1;
    7895     2035600 :         if (p1x_diff == 0.0)
    7896      464622 :             tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    7897             :         else
    7898     1570980 :             tan_p1 = p1y_diff / p1x_diff;
    7899             : 
    7900             :         double tan_p2;
    7901     2035600 :         if (p2x_diff == 0.0)
    7902      839515 :             tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    7903             :         else
    7904     1196080 :             tan_p2 = p2y_diff / p2x_diff;
    7905             : 
    7906     2035600 :         if (tan_p1 >= 0)
    7907             :         {
    7908     1904790 :             if (tan_p2 >= 0)
    7909     1881590 :                 return tan_p1 < tan_p2;
    7910             :             else
    7911       23199 :                 return true;
    7912             :         }
    7913             :         else
    7914             :         {
    7915      130806 :             if (tan_p2 >= 0)
    7916      103900 :                 return false;
    7917             :             else
    7918       26906 :                 return tan_p1 < tan_p2;
    7919             :         }
    7920      666202 :     };
    7921             : 
    7922             :     // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
    7923             :     // hull
    7924      666202 :     std::sort(intersection.begin(), intersection.end(), sortFunc);
    7925             : 
    7926             :     // Remove duplicated points
    7927      666202 :     size_t j = 1;
    7928     3106240 :     for (size_t i = 1; i < intersection.size(); ++i)
    7929             :     {
    7930     2440040 :         if (intersection[i] != intersection[i - 1])
    7931             :         {
    7932     1452560 :             if (j < i)
    7933      545275 :                 intersection[j] = intersection[i];
    7934     1452560 :             ++j;
    7935             :         }
    7936             :     }
    7937      666202 :     intersection.resize(j);
    7938             : }
    7939             : 
    7940             : /************************************************************************/
    7941             : /*                            getArea()                                 */
    7942             : /************************************************************************/
    7943             : 
    7944             : // poly may or may not be closed.
    7945      558521 : static double getArea(const XYPoly &poly)
    7946             : {
    7947             :     // CPLAssert(poly.size() >= 2);
    7948      558521 :     const size_t nPointCount = poly.size();
    7949             :     double dfAreaSum =
    7950      558521 :         poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
    7951             : 
    7952     1765140 :     for (size_t i = 1; i < nPointCount - 1; i++)
    7953             :     {
    7954     1206610 :         dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
    7955             :     }
    7956             : 
    7957      558521 :     dfAreaSum += poly[nPointCount - 1].first *
    7958      558521 :                  (poly[0].second - poly[nPointCount - 2].second);
    7959             : 
    7960      558521 :     return 0.5 * std::fabs(dfAreaSum);
    7961             : }
    7962             : 
    7963             : /************************************************************************/
    7964             : /*                           GWKSumPreserving()                         */
    7965             : /************************************************************************/
    7966             : 
    7967             : static void GWKSumPreservingThread(void *pData);
    7968             : 
    7969          18 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
    7970             : {
    7971          18 :     return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
    7972             : }
    7973             : 
    7974          18 : static void GWKSumPreservingThread(void *pData)
    7975             : {
    7976          18 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    7977          18 :     GDALWarpKernel *poWK = psJob->poWK;
    7978          18 :     const int iYMin = psJob->iYMin;
    7979          18 :     const int iYMax = psJob->iYMax;
    7980             :     const bool bIsAffineNoRotation =
    7981          18 :         GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
    7982          26 :                                         poWK->pTransformerArg) &&
    7983             :         // for debug/testing purposes
    7984           8 :         CPLTestBool(
    7985          18 :             CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
    7986             : 
    7987          18 :     const int nDstXSize = poWK->nDstXSize;
    7988          18 :     const int nSrcXSize = poWK->nSrcXSize;
    7989          18 :     const int nSrcYSize = poWK->nSrcYSize;
    7990             : 
    7991          36 :     std::vector<double> adfX0(nSrcXSize + 1);
    7992          36 :     std::vector<double> adfY0(nSrcXSize + 1);
    7993          36 :     std::vector<double> adfZ0(nSrcXSize + 1);
    7994          36 :     std::vector<double> adfX1(nSrcXSize + 1);
    7995          36 :     std::vector<double> adfY1(nSrcXSize + 1);
    7996          36 :     std::vector<double> adfZ1(nSrcXSize + 1);
    7997          36 :     std::vector<int> abSuccess0(nSrcXSize + 1);
    7998          36 :     std::vector<int> abSuccess1(nSrcXSize + 1);
    7999             : 
    8000             :     CPLRectObj sGlobalBounds;
    8001          18 :     sGlobalBounds.minx = -2 * poWK->dfXScale;
    8002          18 :     sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
    8003          18 :     sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
    8004          18 :     sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
    8005          18 :     CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
    8006             : 
    8007             :     struct SourcePixel
    8008             :     {
    8009             :         int iSrcX;
    8010             :         int iSrcY;
    8011             : 
    8012             :         // Coordinates of source pixel in target pixel coordinates
    8013             :         double dfDstX0;
    8014             :         double dfDstY0;
    8015             :         double dfDstX1;
    8016             :         double dfDstY1;
    8017             :         double dfDstX2;
    8018             :         double dfDstY2;
    8019             :         double dfDstX3;
    8020             :         double dfDstY3;
    8021             : 
    8022             :         // Source pixel total area (might be larger than the one described
    8023             :         // by above coordinates, if the pixel was crossing the antimeridian
    8024             :         // and split)
    8025             :         double dfArea;
    8026             :     };
    8027             : 
    8028          36 :     std::vector<SourcePixel> sourcePixels;
    8029             : 
    8030          36 :     XYPoly discontinuityLeft(5);
    8031          36 :     XYPoly discontinuityRight(5);
    8032             : 
    8033             :     /* ==================================================================== */
    8034             :     /*      First pass: transform the 4 corners of each potential           */
    8035             :     /*      contributing source pixel to target pixel coordinates.          */
    8036             :     /* ==================================================================== */
    8037             : 
    8038             :     // Special case for top line
    8039             :     {
    8040          18 :         int iY = 0;
    8041        1130 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8042             :         {
    8043        1112 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8044        1112 :             adfY1[iX] = iY + poWK->nSrcYOff;
    8045        1112 :             adfZ1[iX] = 0;
    8046             :         }
    8047             : 
    8048          18 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8049             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8050             :                              abSuccess1.data());
    8051             : 
    8052        1130 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8053             :         {
    8054        1112 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8055           0 :                 abSuccess1[iX] = FALSE;
    8056             :             else
    8057             :             {
    8058        1112 :                 adfX1[iX] -= poWK->nDstXOff;
    8059        1112 :                 adfY1[iX] -= poWK->nDstYOff;
    8060             :             }
    8061             :         }
    8062             :     }
    8063             : 
    8064      413412 :     const auto getInsideXSign = [poWK, nDstXSize](double dfX)
    8065             :     {
    8066      413412 :         return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
    8067      205344 :                        dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
    8068      413412 :                    ? 1
    8069      208068 :                    : -1;
    8070          18 :     };
    8071             : 
    8072             :     const auto FindDiscontinuity =
    8073          80 :         [poWK, psJob, getInsideXSign](
    8074             :             double dfXLeft, double dfXRight, double dfY,
    8075             :             int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
    8076         800 :             double &dfXMidReprojectedRight, double &dfYMidReprojected)
    8077             :     {
    8078         880 :         for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
    8079             :         {
    8080         800 :             double dfXMid = (dfXLeft + dfXRight) / 2;
    8081         800 :             double dfXMidReprojected = dfXMid;
    8082         800 :             dfYMidReprojected = dfY;
    8083         800 :             double dfZ = 0;
    8084         800 :             int nSuccess = 0;
    8085         800 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
    8086             :                                  &dfXMidReprojected, &dfYMidReprojected, &dfZ,
    8087             :                                  &nSuccess);
    8088         800 :             if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
    8089             :             {
    8090         456 :                 dfXRight = dfXMid;
    8091         456 :                 dfXMidReprojectedRight = dfXMidReprojected;
    8092             :             }
    8093             :             else
    8094             :             {
    8095         344 :                 dfXLeft = dfXMid;
    8096         344 :                 dfXMidReprojectedLeft = dfXMidReprojected;
    8097             :             }
    8098             :         }
    8099          80 :     };
    8100             : 
    8101         566 :     for (int iY = 0; iY < nSrcYSize; ++iY)
    8102             :     {
    8103         548 :         std::swap(adfX0, adfX1);
    8104         548 :         std::swap(adfY0, adfY1);
    8105         548 :         std::swap(adfZ0, adfZ1);
    8106         548 :         std::swap(abSuccess0, abSuccess1);
    8107             : 
    8108      104512 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8109             :         {
    8110      103964 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8111      103964 :             adfY1[iX] = iY + 1 + poWK->nSrcYOff;
    8112      103964 :             adfZ1[iX] = 0;
    8113             :         }
    8114             : 
    8115         548 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8116             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8117             :                              abSuccess1.data());
    8118             : 
    8119      104512 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8120             :         {
    8121      103964 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8122           0 :                 abSuccess1[iX] = FALSE;
    8123             :             else
    8124             :             {
    8125      103964 :                 adfX1[iX] -= poWK->nDstXOff;
    8126      103964 :                 adfY1[iX] -= poWK->nDstYOff;
    8127             :             }
    8128             :         }
    8129             : 
    8130      103964 :         for (int iX = 0; iX < nSrcXSize; ++iX)
    8131             :         {
    8132      206832 :             if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
    8133      103416 :                 abSuccess1[iX + 1])
    8134             :             {
    8135             :                 /* --------------------------------------------------------------------
    8136             :                  */
    8137             :                 /*      Do not try to apply transparent source pixels to the
    8138             :                  * destination.*/
    8139             :                 /* --------------------------------------------------------------------
    8140             :                  */
    8141      103416 :                 const auto iSrcOffset =
    8142      103416 :                     iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
    8143      105816 :                 if (poWK->panUnifiedSrcValid != nullptr &&
    8144        2400 :                     !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    8145             :                 {
    8146       10971 :                     continue;
    8147             :                 }
    8148             : 
    8149      103410 :                 if (poWK->pafUnifiedSrcDensity != nullptr)
    8150             :                 {
    8151           0 :                     if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
    8152             :                         SRC_DENSITY_THRESHOLD)
    8153           0 :                         continue;
    8154             :                 }
    8155             : 
    8156             :                 SourcePixel sp;
    8157      103410 :                 sp.dfArea = 0;
    8158      103410 :                 sp.dfDstX0 = adfX0[iX];
    8159      103410 :                 sp.dfDstY0 = adfY0[iX];
    8160      103410 :                 sp.dfDstX1 = adfX0[iX + 1];
    8161      103410 :                 sp.dfDstY1 = adfY0[iX + 1];
    8162      103410 :                 sp.dfDstX2 = adfX1[iX + 1];
    8163      103410 :                 sp.dfDstY2 = adfY1[iX + 1];
    8164      103410 :                 sp.dfDstX3 = adfX1[iX];
    8165      103410 :                 sp.dfDstY3 = adfY1[iX];
    8166             : 
    8167             :                 // Detect pixel that likely cross the anti-meridian and
    8168             :                 // introduce a discontinuity when reprojected.
    8169             : 
    8170      103410 :                 if (getInsideXSign(adfX0[iX]) !=
    8171      103506 :                         getInsideXSign(adfX0[iX + 1]) &&
    8172         164 :                     getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
    8173          68 :                     getInsideXSign(adfX0[iX + 1]) ==
    8174      103574 :                         getInsideXSign(adfX1[iX + 1]) &&
    8175          40 :                     (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
    8176             :                         0)
    8177             :                 {
    8178          40 :                     double dfXMidReprojectedLeftTop = 0;
    8179          40 :                     double dfXMidReprojectedRightTop = 0;
    8180          40 :                     double dfYMidReprojectedTop = 0;
    8181          40 :                     FindDiscontinuity(
    8182          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8183          80 :                         iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
    8184             :                         dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
    8185             :                         dfYMidReprojectedTop);
    8186          40 :                     double dfXMidReprojectedLeftBottom = 0;
    8187          40 :                     double dfXMidReprojectedRightBottom = 0;
    8188          40 :                     double dfYMidReprojectedBottom = 0;
    8189          40 :                     FindDiscontinuity(
    8190          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8191          80 :                         iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
    8192             :                         dfXMidReprojectedLeftBottom,
    8193             :                         dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
    8194             : 
    8195          40 :                     discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
    8196          40 :                     discontinuityLeft[1] =
    8197          80 :                         XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
    8198          40 :                     discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
    8199          40 :                                                   dfYMidReprojectedBottom);
    8200          40 :                     discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
    8201          40 :                     discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
    8202             : 
    8203          40 :                     discontinuityRight[0] =
    8204          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8205          40 :                     discontinuityRight[1] =
    8206          80 :                         XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
    8207          40 :                     discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
    8208          40 :                                                    dfYMidReprojectedBottom);
    8209          40 :                     discontinuityRight[3] =
    8210          80 :                         XYPair(adfX1[iX + 1], adfY1[iX + 1]);
    8211          40 :                     discontinuityRight[4] =
    8212          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8213             : 
    8214          40 :                     sp.dfArea = getArea(discontinuityLeft) +
    8215          40 :                                 getArea(discontinuityRight);
    8216          40 :                     if (getInsideXSign(adfX0[iX]) >= 1)
    8217             :                     {
    8218          20 :                         sp.dfDstX1 = dfXMidReprojectedLeftTop;
    8219          20 :                         sp.dfDstY1 = dfYMidReprojectedTop;
    8220          20 :                         sp.dfDstX2 = dfXMidReprojectedLeftBottom;
    8221          20 :                         sp.dfDstY2 = dfYMidReprojectedBottom;
    8222             :                     }
    8223             :                     else
    8224             :                     {
    8225          20 :                         sp.dfDstX0 = dfXMidReprojectedRightTop;
    8226          20 :                         sp.dfDstY0 = dfYMidReprojectedTop;
    8227          20 :                         sp.dfDstX3 = dfXMidReprojectedRightBottom;
    8228          20 :                         sp.dfDstY3 = dfYMidReprojectedBottom;
    8229             :                     }
    8230             :                 }
    8231             : 
    8232             :                 // Bounding box of source pixel (expressed in target pixel
    8233             :                 // coordinates)
    8234             :                 CPLRectObj sRect;
    8235      103410 :                 sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
    8236      103410 :                                       std::min(sp.dfDstX2, sp.dfDstX3));
    8237      103410 :                 sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
    8238      103410 :                                       std::min(sp.dfDstY2, sp.dfDstY3));
    8239      103410 :                 sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
    8240      103410 :                                       std::max(sp.dfDstX2, sp.dfDstX3));
    8241      103410 :                 sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
    8242      103410 :                                       std::max(sp.dfDstY2, sp.dfDstY3));
    8243      103410 :                 if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
    8244      101350 :                       sRect.miny < iYMax && sRect.maxy > iYMin))
    8245             :                 {
    8246       10852 :                     continue;
    8247             :                 }
    8248             : 
    8249       92558 :                 sp.iSrcX = iX;
    8250       92558 :                 sp.iSrcY = iY;
    8251             : 
    8252       92558 :                 if (!bIsAffineNoRotation)
    8253             :                 {
    8254             :                     // Check polygon validity (no self-crossing)
    8255       89745 :                     XYPair xy;
    8256       89745 :                     if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
    8257       89745 :                                         XYPair(sp.dfDstX1, sp.dfDstY1),
    8258       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8259      269235 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
    8260       89745 :                         getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
    8261       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8262       89745 :                                         XYPair(sp.dfDstX0, sp.dfDstY0),
    8263      179490 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy))
    8264             :                     {
    8265         113 :                         continue;
    8266             :                     }
    8267             :                 }
    8268             : 
    8269       92445 :                 CPLQuadTreeInsertWithBounds(
    8270             :                     hQuadTree,
    8271             :                     reinterpret_cast<void *>(
    8272       92445 :                         static_cast<uintptr_t>(sourcePixels.size())),
    8273             :                     &sRect);
    8274             : 
    8275       92445 :                 sourcePixels.push_back(sp);
    8276             :             }
    8277             :         }
    8278             :     }
    8279             : 
    8280          36 :     std::vector<double> adfRealValue(poWK->nBands);
    8281          36 :     std::vector<double> adfImagValue(poWK->nBands);
    8282          36 :     std::vector<double> adfBandDensity(poWK->nBands);
    8283          36 :     std::vector<double> adfWeight(poWK->nBands);
    8284             : 
    8285             : #ifdef CHECK_SUM_WITH_GEOS
    8286             :     auto hGEOSContext = OGRGeometry::createGEOSContext();
    8287             :     auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8288             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
    8289             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
    8290             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
    8291             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
    8292             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
    8293             :     auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
    8294             :     auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
    8295             : 
    8296             :     auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8297             :     auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
    8298             :     auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
    8299             : #endif
    8300             : 
    8301             :     const XYPoly xy1{
    8302          36 :         {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
    8303          36 :     XYPoly xy2(5);
    8304          36 :     XYPoly xy2_triangle(4);
    8305          36 :     XYPoly intersection;
    8306             : 
    8307             :     /* ==================================================================== */
    8308             :     /*      Loop over output lines.                                         */
    8309             :     /* ==================================================================== */
    8310         891 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    8311             :     {
    8312             :         CPLRectObj sRect;
    8313         873 :         sRect.miny = iDstY;
    8314         873 :         sRect.maxy = iDstY + 1;
    8315             : 
    8316             :         /* ====================================================================
    8317             :          */
    8318             :         /*      Loop over pixels in output scanline. */
    8319             :         /* ====================================================================
    8320             :          */
    8321      221042 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    8322             :         {
    8323      220169 :             sRect.minx = iDstX;
    8324      220169 :             sRect.maxx = iDstX + 1;
    8325      220169 :             int nSourcePixels = 0;
    8326             :             void **pahSourcePixel =
    8327      220169 :                 CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
    8328      220169 :             if (nSourcePixels == 0)
    8329             :             {
    8330        1258 :                 CPLFree(pahSourcePixel);
    8331        1262 :                 continue;
    8332             :             }
    8333             : 
    8334      218911 :             std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
    8335      218911 :             std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
    8336      218911 :             std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
    8337      218911 :             std::fill(adfWeight.begin(), adfWeight.end(), 0);
    8338      218911 :             double dfDensity = 0;
    8339      218911 :             double dfTotalWeight = 0;
    8340             : 
    8341             :             /* ====================================================================
    8342             :              */
    8343             :             /*          Iterate over each contributing source pixel to add its
    8344             :              */
    8345             :             /*          value weighed by the ratio of the area of its
    8346             :              * intersection  */
    8347             :             /*          with the target pixel divided by the area of the source
    8348             :              */
    8349             :             /*          pixel. */
    8350             :             /* ====================================================================
    8351             :              */
    8352     1020520 :             for (int i = 0; i < nSourcePixels; ++i)
    8353             :             {
    8354      801614 :                 const int iSourcePixel = static_cast<int>(
    8355      801614 :                     reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
    8356      801614 :                 auto &sp = sourcePixels[iSourcePixel];
    8357             : 
    8358      801614 :                 double dfWeight = 0.0;
    8359      801614 :                 if (bIsAffineNoRotation)
    8360             :                 {
    8361             :                     // Optimization since the source pixel is a rectangle in
    8362             :                     // target pixel coordinates
    8363       16312 :                     double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
    8364       16312 :                     double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
    8365       16312 :                     double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
    8366       16312 :                     double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
    8367       16312 :                     double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
    8368       16312 :                     double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
    8369       16312 :                     double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
    8370       16312 :                     double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
    8371       16312 :                     dfWeight =
    8372       16312 :                         ((dfIntersMaxX - dfIntersMinX) *
    8373       16312 :                          (dfIntersMaxY - dfIntersMinY)) /
    8374       16312 :                         ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
    8375             :                 }
    8376             :                 else
    8377             :                 {
    8378             :                     // Compute the polygon of the source pixel in target pixel
    8379             :                     // coordinates, and shifted to the target pixel (unit square
    8380             :                     // coordinates)
    8381             : 
    8382      785302 :                     xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    8383      785302 :                     xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
    8384      785302 :                     xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
    8385      785302 :                     xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
    8386      785302 :                     xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    8387             : 
    8388      785302 :                     if (isConvex(xy2))
    8389             :                     {
    8390      785302 :                         getConvexPolyIntersection(xy1, xy2, intersection);
    8391      785302 :                         if (intersection.size() >= 3)
    8392             :                         {
    8393      468849 :                             dfWeight = getArea(intersection);
    8394             :                         }
    8395             :                     }
    8396             :                     else
    8397             :                     {
    8398             :                         // Split xy2 into 2 triangles.
    8399           0 :                         xy2_triangle[0] = xy2[0];
    8400           0 :                         xy2_triangle[1] = xy2[1];
    8401           0 :                         xy2_triangle[2] = xy2[2];
    8402           0 :                         xy2_triangle[3] = xy2[0];
    8403           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    8404             :                                                   intersection);
    8405           0 :                         if (intersection.size() >= 3)
    8406             :                         {
    8407           0 :                             dfWeight = getArea(intersection);
    8408             :                         }
    8409             : 
    8410           0 :                         xy2_triangle[1] = xy2[2];
    8411           0 :                         xy2_triangle[2] = xy2[3];
    8412           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    8413             :                                                   intersection);
    8414           0 :                         if (intersection.size() >= 3)
    8415             :                         {
    8416           0 :                             dfWeight += getArea(intersection);
    8417             :                         }
    8418             :                     }
    8419      785302 :                     if (dfWeight > 0.0)
    8420             :                     {
    8421      468828 :                         if (sp.dfArea == 0)
    8422       89592 :                             sp.dfArea = getArea(xy2);
    8423      468828 :                         dfWeight /= sp.dfArea;
    8424             :                     }
    8425             : 
    8426             : #ifdef CHECK_SUM_WITH_GEOS
    8427             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
    8428             :                                          sp.dfDstX0 - iDstX,
    8429             :                                          sp.dfDstY0 - iDstY);
    8430             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
    8431             :                                          sp.dfDstX1 - iDstX,
    8432             :                                          sp.dfDstY1 - iDstY);
    8433             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
    8434             :                                          sp.dfDstX2 - iDstX,
    8435             :                                          sp.dfDstY2 - iDstY);
    8436             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
    8437             :                                          sp.dfDstX3 - iDstX,
    8438             :                                          sp.dfDstY3 - iDstY);
    8439             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
    8440             :                                          sp.dfDstX0 - iDstX,
    8441             :                                          sp.dfDstY0 - iDstY);
    8442             : 
    8443             :                     double dfWeightGEOS = 0.0;
    8444             :                     auto hIntersection =
    8445             :                         GEOSIntersection_r(hGEOSContext, hP1, hP2);
    8446             :                     if (hIntersection)
    8447             :                     {
    8448             :                         double dfIntersArea = 0.0;
    8449             :                         if (GEOSArea_r(hGEOSContext, hIntersection,
    8450             :                                        &dfIntersArea) &&
    8451             :                             dfIntersArea > 0)
    8452             :                         {
    8453             :                             double dfSourceArea = 0.0;
    8454             :                             if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
    8455             :                             {
    8456             :                                 dfWeightGEOS = dfIntersArea / dfSourceArea;
    8457             :                             }
    8458             :                         }
    8459             :                         GEOSGeom_destroy_r(hGEOSContext, hIntersection);
    8460             :                     }
    8461             :                     if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
    8462             :                     {
    8463             :                         /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
    8464             :                                         dfWeight, dfWeightGEOS);
    8465             :                         printf("xy2: ");  // ok
    8466             :                         for (const auto &xy : xy2)
    8467             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    8468             :                         printf("\n");                                   // ok
    8469             :                         printf("intersection: ");                       // ok
    8470             :                         for (const auto &xy : intersection)
    8471             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    8472             :                         printf("\n");                                   // ok
    8473             :                     }
    8474             : #endif
    8475             :                 }
    8476      801614 :                 if (dfWeight > 0.0)
    8477             :                 {
    8478      474099 :                     const GPtrDiff_t iSrcOffset =
    8479      474099 :                         sp.iSrcX +
    8480      474099 :                         static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
    8481      474099 :                     dfTotalWeight += dfWeight;
    8482             : 
    8483      474099 :                     if (poWK->pafUnifiedSrcDensity != nullptr)
    8484             :                     {
    8485           0 :                         dfDensity +=
    8486           0 :                             dfWeight * poWK->pafUnifiedSrcDensity[iSrcOffset];
    8487             :                     }
    8488             :                     else
    8489             :                     {
    8490      474099 :                         dfDensity += dfWeight;
    8491             :                     }
    8492             : 
    8493     1818720 :                     for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    8494             :                     {
    8495             :                         // Returns pixel value if it is not no data.
    8496             :                         double dfBandDensity;
    8497             :                         double dfRealValue;
    8498             :                         double dfImagValue;
    8499     2689240 :                         if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
    8500             :                                                &dfBandDensity, &dfRealValue,
    8501             :                                                &dfImagValue) &&
    8502     1344620 :                               dfBandDensity > BAND_DENSITY_THRESHOLD))
    8503             :                         {
    8504           0 :                             continue;
    8505             :                         }
    8506             : 
    8507     1344620 :                         adfRealValue[iBand] += dfRealValue * dfWeight;
    8508     1344620 :                         adfImagValue[iBand] += dfImagValue * dfWeight;
    8509     1344620 :                         adfBandDensity[iBand] += dfBandDensity * dfWeight;
    8510     1344620 :                         adfWeight[iBand] += dfWeight;
    8511             :                     }
    8512             :                 }
    8513             :             }
    8514             : 
    8515      218911 :             CPLFree(pahSourcePixel);
    8516             : 
    8517             :             /* --------------------------------------------------------------------
    8518             :              */
    8519             :             /*          Update destination pixel value. */
    8520             :             /* --------------------------------------------------------------------
    8521             :              */
    8522      218911 :             bool bHasFoundDensity = false;
    8523      218911 :             const GPtrDiff_t iDstOffset =
    8524      218911 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    8525      827822 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    8526             :             {
    8527      608911 :                 if (adfWeight[iBand] > 0)
    8528             :                 {
    8529             :                     const double dfBandDensity =
    8530      608907 :                         adfBandDensity[iBand] / adfWeight[iBand];
    8531      608907 :                     if (dfBandDensity > BAND_DENSITY_THRESHOLD)
    8532             :                     {
    8533      608907 :                         bHasFoundDensity = true;
    8534      608907 :                         GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    8535      608907 :                                          adfRealValue[iBand],
    8536      608907 :                                          adfImagValue[iBand]);
    8537             :                     }
    8538             :                 }
    8539             :             }
    8540             : 
    8541      218911 :             if (!bHasFoundDensity)
    8542           4 :                 continue;
    8543             : 
    8544             :             /* --------------------------------------------------------------------
    8545             :              */
    8546             :             /*          Update destination density/validity masks. */
    8547             :             /* --------------------------------------------------------------------
    8548             :              */
    8549      218907 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
    8550             : 
    8551      218907 :             if (poWK->panDstValid != nullptr)
    8552             :             {
    8553       11750 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    8554             :             }
    8555             :         }
    8556             : 
    8557             :         /* --------------------------------------------------------------------
    8558             :          */
    8559             :         /*      Report progress to the user, and optionally cancel out. */
    8560             :         /* --------------------------------------------------------------------
    8561             :          */
    8562         873 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    8563           0 :             break;
    8564             :     }
    8565             : 
    8566             : #ifdef CHECK_SUM_WITH_GEOS
    8567             :     GEOSGeom_destroy_r(hGEOSContext, hP1);
    8568             :     GEOSGeom_destroy_r(hGEOSContext, hP2);
    8569             :     OGRGeometry::freeGEOSContext(hGEOSContext);
    8570             : #endif
    8571          18 :     CPLQuadTreeDestroy(hQuadTree);
    8572          18 : }

Generated by: LCOV version 1.14