LCOV - code coverage report
Current view: top level - alg - gdalwarpkernel.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 3494 4044 86.4 %
Date: 2026-03-21 11:56:32 Functions: 240 277 86.6 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  High Performance Image Reprojector
       4             :  * Purpose:  Implementation of the GDALWarpKernel class.  Implements the actual
       5             :  *           image warping for a "chunk" of input and output imagery already
       6             :  *           loaded into memory.
       7             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       8             :  *
       9             :  ******************************************************************************
      10             :  * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
      11             :  * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
      12             :  *
      13             :  * SPDX-License-Identifier: MIT
      14             :  ****************************************************************************/
      15             : 
      16             : #include "cpl_port.h"
      17             : #include "gdalwarper.h"
      18             : 
      19             : #include <cfloat>
      20             : #include <cmath>
      21             : #include <cstddef>
      22             : #include <cstdlib>
      23             : #include <cstring>
      24             : 
      25             : #include <algorithm>
      26             : #include <limits>
      27             : #include <mutex>
      28             : #include <new>
      29             : #include <utility>
      30             : #include <vector>
      31             : 
      32             : #include "cpl_atomic_ops.h"
      33             : #include "cpl_conv.h"
      34             : #include "cpl_error.h"
      35             : #include "cpl_float.h"
      36             : #include "cpl_mask.h"
      37             : #include "cpl_multiproc.h"
      38             : #include "cpl_progress.h"
      39             : #include "cpl_string.h"
      40             : #include "cpl_vsi.h"
      41             : #include "cpl_worker_thread_pool.h"
      42             : #include "cpl_quad_tree.h"
      43             : #include "gdal.h"
      44             : #include "gdal_alg.h"
      45             : #include "gdal_alg_priv.h"
      46             : #include "gdal_thread_pool.h"
      47             : #include "gdalresamplingkernels.h"
      48             : 
      49             : // #define CHECK_SUM_WITH_GEOS
      50             : #ifdef CHECK_SUM_WITH_GEOS
      51             : #include "ogr_geometry.h"
      52             : #include "ogr_geos.h"
      53             : #endif
      54             : 
      55             : #ifdef USE_NEON_OPTIMIZATIONS
      56             : #include "include_sse2neon.h"
      57             : #define USE_SSE2
      58             : 
      59             : #include "gdalsse_priv.h"
      60             : 
      61             : // We restrict to 64bit processors because they are guaranteed to have SSE2.
      62             : // Could possibly be used too on 32bit, but we would need to check at runtime.
      63             : #elif defined(__x86_64) || defined(_M_X64)
      64             : #define USE_SSE2
      65             : 
      66             : #include "gdalsse_priv.h"
      67             : 
      68             : #if __SSE4_1__
      69             : #include <smmintrin.h>
      70             : #endif
      71             : 
      72             : #if __SSE3__
      73             : #include <pmmintrin.h>
      74             : #endif
      75             : 
      76             : #endif
      77             : 
      78             : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
      79             : constexpr float SRC_DENSITY_THRESHOLD_FLOAT = 0.000000001f;
      80             : constexpr double SRC_DENSITY_THRESHOLD_DOUBLE = 0.000000001;
      81             : 
      82             : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
      83             : 
      84             : static const int anGWKFilterRadius[] = {
      85             :     0,  // Nearest neighbour
      86             :     1,  // Bilinear
      87             :     2,  // Cubic Convolution (Catmull-Rom)
      88             :     2,  // Cubic B-Spline
      89             :     3,  // Lanczos windowed sinc
      90             :     0,  // Average
      91             :     0,  // Mode
      92             :     0,  // Reserved GRA_Gauss=7
      93             :     0,  // Max
      94             :     0,  // Min
      95             :     0,  // Med
      96             :     0,  // Q1
      97             :     0,  // Q3
      98             :     0,  // Sum
      99             :     0,  // RMS
     100             : };
     101             : 
     102             : static double GWKBilinear(double dfX);
     103             : static double GWKCubic(double dfX);
     104             : static double GWKBSpline(double dfX);
     105             : static double GWKLanczosSinc(double dfX);
     106             : 
     107             : static const FilterFuncType apfGWKFilter[] = {
     108             :     nullptr,         // Nearest neighbour
     109             :     GWKBilinear,     // Bilinear
     110             :     GWKCubic,        // Cubic Convolution (Catmull-Rom)
     111             :     GWKBSpline,      // Cubic B-Spline
     112             :     GWKLanczosSinc,  // Lanczos windowed sinc
     113             :     nullptr,         // Average
     114             :     nullptr,         // Mode
     115             :     nullptr,         // Reserved GRA_Gauss=7
     116             :     nullptr,         // Max
     117             :     nullptr,         // Min
     118             :     nullptr,         // Med
     119             :     nullptr,         // Q1
     120             :     nullptr,         // Q3
     121             :     nullptr,         // Sum
     122             :     nullptr,         // RMS
     123             : };
     124             : 
     125             : // TODO(schwehr): Can we make these functions have a const * const arg?
     126             : static double GWKBilinear4Values(double *padfVals);
     127             : static double GWKCubic4Values(double *padfVals);
     128             : static double GWKBSpline4Values(double *padfVals);
     129             : static double GWKLanczosSinc4Values(double *padfVals);
     130             : 
     131             : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
     132             :     nullptr,                // Nearest neighbour
     133             :     GWKBilinear4Values,     // Bilinear
     134             :     GWKCubic4Values,        // Cubic Convolution (Catmull-Rom)
     135             :     GWKBSpline4Values,      // Cubic B-Spline
     136             :     GWKLanczosSinc4Values,  // Lanczos windowed sinc
     137             :     nullptr,                // Average
     138             :     nullptr,                // Mode
     139             :     nullptr,                // Reserved GRA_Gauss=7
     140             :     nullptr,                // Max
     141             :     nullptr,                // Min
     142             :     nullptr,                // Med
     143             :     nullptr,                // Q1
     144             :     nullptr,                // Q3
     145             :     nullptr,                // Sum
     146             :     nullptr,                // RMS
     147             : };
     148             : 
     149       13663 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
     150             : {
     151             :     static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
     152             :                   "Bad size of anGWKFilterRadius");
     153       13663 :     return anGWKFilterRadius[eResampleAlg];
     154             : }
     155             : 
     156        5093 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
     157             : {
     158             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
     159             :                   "Bad size of apfGWKFilter");
     160        5093 :     return apfGWKFilter[eResampleAlg];
     161             : }
     162             : 
     163        5093 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
     164             : {
     165             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
     166             :                   "Bad size of apfGWKFilter4Values");
     167        5093 :     return apfGWKFilter4Values[eResampleAlg];
     168             : }
     169             : 
     170             : static CPLErr GWKGeneralCase(GDALWarpKernel *);
     171             : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
     172             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     173             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     174             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     175             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     176             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     177             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     178             : #endif
     179             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     180             : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
     181             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     182             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     183             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     184             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     185             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     186             : #endif
     187             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     188             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     189             : static CPLErr GWKNearestInt8(GDALWarpKernel *poWK);
     190             : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
     191             : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
     192             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     193             : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
     194             : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
     195             : static CPLErr GWKSumPreserving(GDALWarpKernel *);
     196             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     197             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     198             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     199             : 
     200             : /************************************************************************/
     201             : /*                             GWKJobStruct                             */
     202             : /************************************************************************/
     203             : 
     204             : struct GWKJobStruct
     205             : {
     206             :     std::mutex &mutex;
     207             :     std::condition_variable &cv;
     208             :     int counterSingleThreaded = 0;
     209             :     int &counter;
     210             :     bool &stopFlag;
     211             :     GDALWarpKernel *poWK = nullptr;
     212             :     int iYMin = 0;
     213             :     int iYMax = 0;
     214             :     int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
     215             :     void *pTransformerArg = nullptr;
     216             :     // used by GWKRun() to assign the proper pTransformerArg
     217             :     void (*pfnFunc)(void *) = nullptr;
     218             : 
     219        3178 :     GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
     220             :                  int &counter_, bool &stopFlag_)
     221        3178 :         : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
     222             :     {
     223        3178 :     }
     224             : };
     225             : 
     226             : struct GWKThreadData
     227             : {
     228             :     std::unique_ptr<CPLJobQueue> poJobQueue{};
     229             :     std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
     230             :     int nMaxThreads{0};
     231             :     int counter{0};
     232             :     bool stopFlag{false};
     233             :     std::mutex mutex{};
     234             :     std::condition_variable cv{};
     235             :     bool bTransformerArgInputAssignedToThread{false};
     236             :     void *pTransformerArgInput{
     237             :         nullptr};  // owned by calling layer. Not to be destroyed
     238             :     std::map<GIntBig, void *> mapThreadToTransformerArg{};
     239             :     int nTotalThreadCountForThisRun = 0;
     240             :     int nCurThreadCountForThisRun = 0;
     241             : };
     242             : 
     243             : /************************************************************************/
     244             : /*                         GWKProgressThread()                          */
     245             : /************************************************************************/
     246             : 
     247             : // Return TRUE if the computation must be interrupted.
     248          36 : static int GWKProgressThread(GWKJobStruct *psJob)
     249             : {
     250          36 :     bool stop = false;
     251             :     {
     252          36 :         std::lock_guard<std::mutex> lock(psJob->mutex);
     253          36 :         psJob->counter++;
     254          36 :         stop = psJob->stopFlag;
     255             :     }
     256          36 :     psJob->cv.notify_one();
     257             : 
     258          36 :     return stop;
     259             : }
     260             : 
     261             : /************************************************************************/
     262             : /*                       GWKProgressMonoThread()                        */
     263             : /************************************************************************/
     264             : 
     265             : // Return TRUE if the computation must be interrupted.
     266      435765 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
     267             : {
     268      435765 :     GDALWarpKernel *poWK = psJob->poWK;
     269      435765 :     if (!poWK->pfnProgress(poWK->dfProgressBase +
     270      435765 :                                poWK->dfProgressScale *
     271      435765 :                                    (++psJob->counterSingleThreaded /
     272      435765 :                                     static_cast<double>(psJob->iYMax)),
     273             :                            "", poWK->pProgress))
     274             :     {
     275           1 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     276           1 :         psJob->stopFlag = true;
     277           1 :         return TRUE;
     278             :     }
     279      435764 :     return FALSE;
     280             : }
     281             : 
     282             : /************************************************************************/
     283             : /*                        GWKGenericMonoThread()                        */
     284             : /************************************************************************/
     285             : 
     286        3154 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
     287             :                                    void (*pfnFunc)(void *pUserData))
     288             : {
     289        3154 :     GWKThreadData td;
     290             : 
     291             :     // NOTE: the mutex is not used.
     292        3154 :     GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
     293        3154 :     job.poWK = poWK;
     294        3154 :     job.iYMin = 0;
     295        3154 :     job.iYMax = poWK->nDstYSize;
     296        3154 :     job.pfnProgress = GWKProgressMonoThread;
     297        3154 :     job.pTransformerArg = poWK->pTransformerArg;
     298        3154 :     job.counterSingleThreaded = td.counter;
     299        3154 :     pfnFunc(&job);
     300        3154 :     td.counter = job.counterSingleThreaded;
     301             : 
     302        6308 :     return td.stopFlag ? CE_Failure : CE_None;
     303             : }
     304             : 
     305             : /************************************************************************/
     306             : /*                          GWKThreadsCreate()                          */
     307             : /************************************************************************/
     308             : 
     309        1772 : void *GWKThreadsCreate(char **papszWarpOptions,
     310             :                        GDALTransformerFunc /* pfnTransformer */,
     311             :                        void *pTransformerArg)
     312             : {
     313        1772 :     const int nThreads = GDALGetNumThreads(papszWarpOptions, "NUM_THREADS",
     314             :                                            GDAL_DEFAULT_MAX_THREAD_COUNT,
     315             :                                            /* bDefaultAllCPUs = */ false);
     316        1772 :     GWKThreadData *psThreadData = new GWKThreadData();
     317             :     auto poThreadPool =
     318        1772 :         nThreads > 1 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
     319        1772 :     if (poThreadPool)
     320             :     {
     321          24 :         psThreadData->nMaxThreads = nThreads;
     322          24 :         psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
     323             :             nThreads,
     324          24 :             GWKJobStruct(psThreadData->mutex, psThreadData->cv,
     325          48 :                          psThreadData->counter, psThreadData->stopFlag)));
     326             : 
     327          24 :         psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
     328          24 :         psThreadData->pTransformerArgInput = pTransformerArg;
     329             :     }
     330             : 
     331        1772 :     return psThreadData;
     332             : }
     333             : 
     334             : /************************************************************************/
     335             : /*                           GWKThreadsEnd()                            */
     336             : /************************************************************************/
     337             : 
     338        1772 : void GWKThreadsEnd(void *psThreadDataIn)
     339             : {
     340        1772 :     if (psThreadDataIn == nullptr)
     341           0 :         return;
     342             : 
     343        1772 :     GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
     344        1772 :     if (psThreadData->poJobQueue)
     345             :     {
     346             :         // cppcheck-suppress constVariableReference
     347          34 :         for (auto &pair : psThreadData->mapThreadToTransformerArg)
     348             :         {
     349          10 :             CPLAssert(pair.second != psThreadData->pTransformerArgInput);
     350          10 :             GDALDestroyTransformer(pair.second);
     351             :         }
     352          24 :         psThreadData->poJobQueue.reset();
     353             :     }
     354        1772 :     delete psThreadData;
     355             : }
     356             : 
     357             : /************************************************************************/
     358             : /*                         ThreadFuncAdapter()                          */
     359             : /************************************************************************/
     360             : 
     361          33 : static void ThreadFuncAdapter(void *pData)
     362             : {
     363          33 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
     364          33 :     GWKThreadData *psThreadData =
     365          33 :         static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
     366             : 
     367             :     // Look if we have already a per-thread transformer
     368          33 :     void *pTransformerArg = nullptr;
     369          33 :     const GIntBig nThreadId = CPLGetPID();
     370             : 
     371             :     {
     372          66 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     373          33 :         ++psThreadData->nCurThreadCountForThisRun;
     374             : 
     375          33 :         auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
     376          33 :         if (oIter != psThreadData->mapThreadToTransformerArg.end())
     377             :         {
     378           0 :             pTransformerArg = oIter->second;
     379             :         }
     380          33 :         else if (!psThreadData->bTransformerArgInputAssignedToThread &&
     381          33 :                  psThreadData->nCurThreadCountForThisRun ==
     382          33 :                      psThreadData->nTotalThreadCountForThisRun)
     383             :         {
     384             :             // If we are the last thread to be started, temporarily borrow the
     385             :             // original transformer
     386          23 :             psThreadData->bTransformerArgInputAssignedToThread = true;
     387          23 :             pTransformerArg = psThreadData->pTransformerArgInput;
     388          23 :             psThreadData->mapThreadToTransformerArg[nThreadId] =
     389             :                 pTransformerArg;
     390             :         }
     391             : 
     392          33 :         if (pTransformerArg == nullptr)
     393             :         {
     394          10 :             CPLAssert(psThreadData->pTransformerArgInput != nullptr);
     395          10 :             CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
     396             :         }
     397             :     }
     398             : 
     399             :     // If no transformer assigned to current thread, instantiate one
     400          33 :     if (pTransformerArg == nullptr)
     401             :     {
     402             :         // This somehow assumes that GDALCloneTransformer() is thread-safe
     403             :         // which should normally be the case.
     404             :         pTransformerArg =
     405          10 :             GDALCloneTransformer(psThreadData->pTransformerArgInput);
     406             : 
     407             :         // Lock for the stop flag and the transformer map.
     408          10 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     409          10 :         if (!pTransformerArg)
     410             :         {
     411           0 :             psJob->stopFlag = true;
     412           0 :             return;
     413             :         }
     414          10 :         psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
     415             :     }
     416             : 
     417          33 :     psJob->pTransformerArg = pTransformerArg;
     418          33 :     psJob->pfnFunc(pData);
     419             : 
     420             :     // Give back original transformer, if borrowed.
     421             :     {
     422          66 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     423          33 :         if (psThreadData->bTransformerArgInputAssignedToThread &&
     424          25 :             pTransformerArg == psThreadData->pTransformerArgInput)
     425             :         {
     426             :             psThreadData->mapThreadToTransformerArg.erase(
     427          23 :                 psThreadData->mapThreadToTransformerArg.find(nThreadId));
     428          23 :             psThreadData->bTransformerArgInputAssignedToThread = false;
     429             :         }
     430             :     }
     431             : }
     432             : 
     433             : /************************************************************************/
     434             : /*                               GWKRun()                               */
     435             : /************************************************************************/
     436             : 
     437        3177 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
     438             :                      void (*pfnFunc)(void *pUserData))
     439             : 
     440             : {
     441        3177 :     const int nDstYSize = poWK->nDstYSize;
     442             : 
     443        3177 :     CPLDebug("GDAL",
     444             :              "GDALWarpKernel()::%s() "
     445             :              "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
     446             :              pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
     447             :              poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
     448             :              poWK->nDstYSize);
     449             : 
     450        3177 :     if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
     451             :     {
     452           0 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     453           0 :         return CE_Failure;
     454             :     }
     455             : 
     456        3177 :     GWKThreadData *psThreadData =
     457             :         static_cast<GWKThreadData *>(poWK->psThreadData);
     458        3177 :     if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
     459             :     {
     460        3154 :         return GWKGenericMonoThread(poWK, pfnFunc);
     461             :     }
     462             : 
     463          23 :     int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
     464             :     // Config option mostly useful for tests to be able to test multithreading
     465             :     // with small rasters
     466             :     const int nWarpChunkSize =
     467          23 :         atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
     468          23 :     if (nWarpChunkSize > 0)
     469             :     {
     470          21 :         GIntBig nChunks =
     471          21 :             static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
     472          21 :         if (nThreads > nChunks)
     473          16 :             nThreads = static_cast<int>(nChunks);
     474             :     }
     475          23 :     if (nThreads <= 0)
     476          19 :         nThreads = 1;
     477             : 
     478          23 :     CPLDebug("WARP", "Using %d threads", nThreads);
     479             : 
     480          23 :     auto &jobs = *psThreadData->threadJobs;
     481          23 :     CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
     482             :     // Fill-in job structures.
     483          56 :     for (int i = 0; i < nThreads; ++i)
     484             :     {
     485          33 :         auto &job = jobs[i];
     486          33 :         job.poWK = poWK;
     487          33 :         job.iYMin =
     488          33 :             static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
     489          33 :         job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
     490          33 :                                      nThreads);
     491          33 :         if (poWK->pfnProgress != GDALDummyProgress)
     492           2 :             job.pfnProgress = GWKProgressThread;
     493          33 :         job.pfnFunc = pfnFunc;
     494             :     }
     495             : 
     496             :     bool bStopFlag;
     497             :     {
     498          23 :         std::unique_lock<std::mutex> lock(psThreadData->mutex);
     499             : 
     500          23 :         psThreadData->nTotalThreadCountForThisRun = nThreads;
     501             :         // coverity[missing_lock]
     502          23 :         psThreadData->nCurThreadCountForThisRun = 0;
     503             : 
     504             :         // Start jobs.
     505          56 :         for (int i = 0; i < nThreads; ++i)
     506             :         {
     507          33 :             auto &job = jobs[i];
     508          33 :             psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
     509             :                                                 static_cast<void *>(&job));
     510             :         }
     511             : 
     512             :         /* --------------------------------------------------------------------
     513             :          */
     514             :         /*      Report progress. */
     515             :         /* --------------------------------------------------------------------
     516             :          */
     517          23 :         if (poWK->pfnProgress != GDALDummyProgress)
     518             :         {
     519           4 :             while (psThreadData->counter < nDstYSize)
     520             :             {
     521           3 :                 psThreadData->cv.wait(lock);
     522           3 :                 if (!poWK->pfnProgress(poWK->dfProgressBase +
     523           3 :                                            poWK->dfProgressScale *
     524           3 :                                                (psThreadData->counter /
     525           3 :                                                 static_cast<double>(nDstYSize)),
     526             :                                        "", poWK->pProgress))
     527             :                 {
     528           1 :                     CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     529           1 :                     psThreadData->stopFlag = true;
     530           1 :                     break;
     531             :                 }
     532             :             }
     533             :         }
     534             : 
     535          23 :         bStopFlag = psThreadData->stopFlag;
     536             :     }
     537             : 
     538             :     /* -------------------------------------------------------------------- */
     539             :     /*      Wait for all jobs to complete.                                  */
     540             :     /* -------------------------------------------------------------------- */
     541          23 :     psThreadData->poJobQueue->WaitCompletion();
     542             : 
     543          23 :     return bStopFlag ? CE_Failure : CE_None;
     544             : }
     545             : 
     546             : /************************************************************************/
     547             : /* ==================================================================== */
     548             : /*                            GDALWarpKernel                            */
     549             : /* ==================================================================== */
     550             : /************************************************************************/
     551             : 
     552             : /**
     553             :  * \class GDALWarpKernel "gdalwarper.h"
     554             :  *
     555             :  * Low level image warping class.
     556             :  *
     557             :  * This class is responsible for low level image warping for one
     558             :  * "chunk" of imagery.  The class is essentially a structure with all
     559             :  * data members public - primarily so that new special-case functions
     560             :  * can be added without changing the class declaration.
     561             :  *
     562             :  * Applications are normally intended to interactive with warping facilities
     563             :  * through the GDALWarpOperation class, though the GDALWarpKernel can in
     564             :  * theory be used directly if great care is taken in setting up the
     565             :  * control data.
     566             :  *
     567             :  * <h3>Design Issues</h3>
     568             :  *
     569             :  * The intention is that PerformWarp() would analyze the setup in terms
     570             :  * of the datatype, resampling type, and validity/density mask usage and
     571             :  * pick one of many specific implementations of the warping algorithm over
     572             :  * a continuum of optimization vs. generality.  At one end there will be a
     573             :  * reference general purpose implementation of the algorithm that supports
     574             :  * any data type (working internally in double precision complex), all three
     575             :  * resampling types, and any or all of the validity/density masks.  At the
     576             :  * other end would be highly optimized algorithms for common cases like
     577             :  * nearest neighbour resampling on GDT_UInt8 data with no masks.
     578             :  *
     579             :  * The full set of optimized versions have not been decided but we should
     580             :  * expect to have at least:
     581             :  *  - One for each resampling algorithm for 8bit data with no masks.
     582             :  *  - One for each resampling algorithm for float data with no masks.
     583             :  *  - One for each resampling algorithm for float data with any/all masks
     584             :  *    (essentially the generic case for just float data).
     585             :  *  - One for each resampling algorithm for 8bit data with support for
     586             :  *    input validity masks (per band or per pixel).  This handles the common
     587             :  *    case of nodata masking.
     588             :  *  - One for each resampling algorithm for float data with support for
     589             :  *    input validity masks (per band or per pixel).  This handles the common
     590             :  *    case of nodata masking.
     591             :  *
     592             :  * Some of the specializations would operate on all bands in one pass
     593             :  * (especially the ones without masking would do this), while others might
     594             :  * process each band individually to reduce code complexity.
     595             :  *
     596             :  * <h3>Masking Semantics</h3>
     597             :  *
     598             :  * A detailed explanation of the semantics of the validity and density masks,
     599             :  * and their effects on resampling kernels is needed here.
     600             :  */
     601             : 
     602             : /************************************************************************/
     603             : /*                     GDALWarpKernel Data Members                      */
     604             : /************************************************************************/
     605             : 
     606             : /**
     607             :  * \var GDALResampleAlg GDALWarpKernel::eResample;
     608             :  *
     609             :  * Resampling algorithm.
     610             :  *
     611             :  * The resampling algorithm to use.  One of GRA_NearestNeighbour, GRA_Bilinear,
     612             :  * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
     613             :  * GRA_Mode or GRA_Sum.
     614             :  *
     615             :  * This field is required. GDT_NearestNeighbour may be used as a default
     616             :  * value.
     617             :  */
     618             : 
     619             : /**
     620             :  * \var GDALDataType GDALWarpKernel::eWorkingDataType;
     621             :  *
     622             :  * Working pixel data type.
     623             :  *
     624             :  * The datatype of pixels in the source image (papabySrcimage) and
     625             :  * destination image (papabyDstImage) buffers.  Note that operations on
     626             :  * some data types (such as GDT_UInt8) may be much better optimized than other
     627             :  * less common cases.
     628             :  *
     629             :  * This field is required.  It may not be GDT_Unknown.
     630             :  */
     631             : 
     632             : /**
     633             :  * \var int GDALWarpKernel::nBands;
     634             :  *
     635             :  * Number of bands.
     636             :  *
     637             :  * The number of bands (layers) of imagery being warped.  Determines the
     638             :  * number of entries in the papabySrcImage, papanBandSrcValid,
     639             :  * and papabyDstImage arrays.
     640             :  *
     641             :  * This field is required.
     642             :  */
     643             : 
     644             : /**
     645             :  * \var int GDALWarpKernel::nSrcXSize;
     646             :  *
     647             :  * Source image width in pixels.
     648             :  *
     649             :  * This field is required.
     650             :  */
     651             : 
     652             : /**
     653             :  * \var int GDALWarpKernel::nSrcYSize;
     654             :  *
     655             :  * Source image height in pixels.
     656             :  *
     657             :  * This field is required.
     658             :  */
     659             : 
     660             : /**
     661             :  * \var double GDALWarpKernel::dfSrcXExtraSize;
     662             :  *
     663             :  * Number of pixels included in nSrcXSize that are present on the edges of
     664             :  * the area of interest to take into account the width of the kernel.
     665             :  *
     666             :  * This field is required.
     667             :  */
     668             : 
     669             : /**
     670             :  * \var double GDALWarpKernel::dfSrcYExtraSize;
     671             :  *
     672             :  * Number of pixels included in nSrcYExtraSize that are present on the edges of
     673             :  * the area of interest to take into account the height of the kernel.
     674             :  *
     675             :  * This field is required.
     676             :  */
     677             : 
     678             : /**
     679             :  * \var int GDALWarpKernel::papabySrcImage;
     680             :  *
     681             :  * Array of source image band data.
     682             :  *
     683             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     684             :  * to image data.  Each individual band of image data is organized as a single
     685             :  * block of image data in left to right, then bottom to top order.  The actual
     686             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     687             :  *
     688             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     689             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     690             :  * this:
     691             :  *
     692             :  * \code
     693             :  *   float dfPixelValue;
     694             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     695             :  *   int   nPixel = 3; // Zero based.
     696             :  *   int   nLine = 4;  // Zero based.
     697             :  *
     698             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     699             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     700             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     701             :  *   dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
     702             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     703             :  * \endcode
     704             :  *
     705             :  * This field is required.
     706             :  */
     707             : 
     708             : /**
     709             :  * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
     710             :  *
     711             :  * Per band validity mask for source pixels.
     712             :  *
     713             :  * Array of pixel validity mask layers for each source band.   Each of
     714             :  * the mask layers is the same size (in pixels) as the source image with
     715             :  * one bit per pixel.  Note that it is legal (and common) for this to be
     716             :  * NULL indicating that none of the pixels are invalidated, or for some
     717             :  * band validity masks to be NULL in which case all pixels of the band are
     718             :  * valid.  The following code can be used to test the validity of a particular
     719             :  * pixel.
     720             :  *
     721             :  * \code
     722             :  *   int   bIsValid = TRUE;
     723             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     724             :  *   int   nPixel = 3; // Zero based.
     725             :  *   int   nLine = 4;  // Zero based.
     726             :  *
     727             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     728             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     729             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     730             :  *
     731             :  *   if( poKern->papanBandSrcValid != NULL
     732             :  *       && poKern->papanBandSrcValid[nBand] != NULL )
     733             :  *   {
     734             :  *       GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
     735             :  *       int    iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
     736             :  *
     737             :  *       bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
     738             :  *   }
     739             :  * \endcode
     740             :  */
     741             : 
     742             : /**
     743             :  * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
     744             :  *
     745             :  * Per pixel validity mask for source pixels.
     746             :  *
     747             :  * A single validity mask layer that applies to the pixels of all source
     748             :  * bands.  It is accessed similarly to papanBandSrcValid, but without the
     749             :  * extra level of band indirection.
     750             :  *
     751             :  * This pointer may be NULL indicating that all pixels are valid.
     752             :  *
     753             :  * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
     754             :  * the pixel isn't considered to be valid unless both arrays indicate it is
     755             :  * valid.
     756             :  */
     757             : 
     758             : /**
     759             :  * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
     760             :  *
     761             :  * Per pixel density mask for source pixels.
     762             :  *
     763             :  * A single density mask layer that applies to the pixels of all source
     764             :  * bands.  It contains values between 0.0 and 1.0 indicating the degree to
     765             :  * which this pixel should be allowed to contribute to the output result.
     766             :  *
     767             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     768             :  *
     769             :  * The density for a pixel may be accessed like this:
     770             :  *
     771             :  * \code
     772             :  *   float fDensity = 1.0;
     773             :  *   int nPixel = 3;  // Zero based.
     774             :  *   int nLine = 4;   // Zero based.
     775             :  *
     776             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     777             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     778             :  *   if( poKern->pafUnifiedSrcDensity != NULL )
     779             :  *     fDensity = poKern->pafUnifiedSrcDensity
     780             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     781             :  * \endcode
     782             :  */
     783             : 
     784             : /**
     785             :  * \var int GDALWarpKernel::nDstXSize;
     786             :  *
     787             :  * Width of destination image in pixels.
     788             :  *
     789             :  * This field is required.
     790             :  */
     791             : 
     792             : /**
     793             :  * \var int GDALWarpKernel::nDstYSize;
     794             :  *
     795             :  * Height of destination image in pixels.
     796             :  *
     797             :  * This field is required.
     798             :  */
     799             : 
     800             : /**
     801             :  * \var GByte **GDALWarpKernel::papabyDstImage;
     802             :  *
     803             :  * Array of destination image band data.
     804             :  *
     805             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     806             :  * to image data.  Each individual band of image data is organized as a single
     807             :  * block of image data in left to right, then bottom to top order.  The actual
     808             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     809             :  *
     810             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     811             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     812             :  * this:
     813             :  *
     814             :  * \code
     815             :  *   float dfPixelValue;
     816             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     817             :  *   int   nPixel = 3; // Zero based.
     818             :  *   int   nLine = 4;  // Zero based.
     819             :  *
     820             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     821             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     822             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     823             :  *   dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
     824             :  *                                  [nPixel + nLine * poKern->nSrcYSize];
     825             :  * \endcode
     826             :  *
     827             :  * This field is required.
     828             :  */
     829             : 
     830             : /**
     831             :  * \var GUInt32 *GDALWarpKernel::panDstValid;
     832             :  *
     833             :  * Per pixel validity mask for destination pixels.
     834             :  *
     835             :  * A single validity mask layer that applies to the pixels of all destination
     836             :  * bands.  It is accessed similarly to papanUnitifiedSrcValid, but based
     837             :  * on the size of the destination image.
     838             :  *
     839             :  * This pointer may be NULL indicating that all pixels are valid.
     840             :  */
     841             : 
     842             : /**
     843             :  * \var float *GDALWarpKernel::pafDstDensity;
     844             :  *
     845             :  * Per pixel density mask for destination pixels.
     846             :  *
     847             :  * A single density mask layer that applies to the pixels of all destination
     848             :  * bands.  It contains values between 0.0 and 1.0.
     849             :  *
     850             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     851             :  *
     852             :  * The density for a pixel may be accessed like this:
     853             :  *
     854             :  * \code
     855             :  *   float fDensity = 1.0;
     856             :  *   int   nPixel = 3; // Zero based.
     857             :  *   int   nLine = 4;  // Zero based.
     858             :  *
     859             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     860             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     861             :  *   if( poKern->pafDstDensity != NULL )
     862             :  *     fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
     863             :  * \endcode
     864             :  */
     865             : 
     866             : /**
     867             :  * \var int GDALWarpKernel::nSrcXOff;
     868             :  *
     869             :  * X offset to source pixel coordinates for transformation.
     870             :  *
     871             :  * See pfnTransformer.
     872             :  *
     873             :  * This field is required.
     874             :  */
     875             : 
     876             : /**
     877             :  * \var int GDALWarpKernel::nSrcYOff;
     878             :  *
     879             :  * Y offset to source pixel coordinates for transformation.
     880             :  *
     881             :  * See pfnTransformer.
     882             :  *
     883             :  * This field is required.
     884             :  */
     885             : 
     886             : /**
     887             :  * \var int GDALWarpKernel::nDstXOff;
     888             :  *
     889             :  * X offset to destination pixel coordinates for transformation.
     890             :  *
     891             :  * See pfnTransformer.
     892             :  *
     893             :  * This field is required.
     894             :  */
     895             : 
     896             : /**
     897             :  * \var int GDALWarpKernel::nDstYOff;
     898             :  *
     899             :  * Y offset to destination pixel coordinates for transformation.
     900             :  *
     901             :  * See pfnTransformer.
     902             :  *
     903             :  * This field is required.
     904             :  */
     905             : 
     906             : /**
     907             :  * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
     908             :  *
     909             :  * Source/destination location transformer.
     910             :  *
     911             :  * The function to call to transform coordinates between source image
     912             :  * pixel/line coordinates and destination image pixel/line coordinates.
     913             :  * See GDALTransformerFunc() for details of the semantics of this function.
     914             :  *
     915             :  * The GDALWarpKern algorithm will only ever use this transformer in
     916             :  * "destination to source" mode (bDstToSrc=TRUE), and will always pass
     917             :  * partial or complete scanlines of points in the destination image as
     918             :  * input.  This means, among other things, that it is safe to the
     919             :  * approximating transform GDALApproxTransform() as the transformation
     920             :  * function.
     921             :  *
     922             :  * Source and destination images may be subsets of a larger overall image.
     923             :  * The transformation algorithms will expect and return pixel/line coordinates
     924             :  * in terms of this larger image, so coordinates need to be offset by
     925             :  * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
     926             :  * passing to pfnTransformer, and after return from it.
     927             :  *
     928             :  * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
     929             :  * data to this function when it is called.
     930             :  *
     931             :  * This field is required.
     932             :  */
     933             : 
     934             : /**
     935             :  * \var void *GDALWarpKernel::pTransformerArg;
     936             :  *
     937             :  * Callback data for pfnTransformer.
     938             :  *
     939             :  * This field may be NULL if not required for the pfnTransformer being used.
     940             :  */
     941             : 
     942             : /**
     943             :  * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
     944             :  *
     945             :  * The function to call to report progress of the algorithm, and to check
     946             :  * for a requested termination of the operation.  It operates according to
     947             :  * GDALProgressFunc() semantics.
     948             :  *
     949             :  * Generally speaking the progress function will be invoked for each
     950             :  * scanline of the destination buffer that has been processed.
     951             :  *
     952             :  * This field may be NULL (internally set to GDALDummyProgress()).
     953             :  */
     954             : 
     955             : /**
     956             :  * \var void *GDALWarpKernel::pProgress;
     957             :  *
     958             :  * Callback data for pfnProgress.
     959             :  *
     960             :  * This field may be NULL if not required for the pfnProgress being used.
     961             :  */
     962             : 
     963             : /************************************************************************/
     964             : /*                           GDALWarpKernel()                           */
     965             : /************************************************************************/
     966             : 
     967        3795 : GDALWarpKernel::GDALWarpKernel()
     968             :     : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
     969             :       eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
     970             :       dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
     971             :       papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
     972             :       pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
     973             :       papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
     974             :       dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
     975             :       nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
     976             :       nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
     977             :       pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
     978             :       pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
     979             :       padfDstNoDataReal(nullptr), psThreadData(nullptr),
     980        3795 :       eTieStrategy(GWKTS_First)
     981             : {
     982        3795 : }
     983             : 
     984             : /************************************************************************/
     985             : /*                          ~GDALWarpKernel()                           */
     986             : /************************************************************************/
     987             : 
     988        3795 : GDALWarpKernel::~GDALWarpKernel()
     989             : {
     990        3795 : }
     991             : 
     992             : /************************************************************************/
     993             : /*                              getArea()                               */
     994             : /************************************************************************/
     995             : 
     996             : typedef std::pair<double, double> XYPair;
     997             : 
     998             : typedef std::vector<XYPair> XYPoly;
     999             : 
    1000             : // poly may or may not be closed.
    1001      565793 : static double getArea(const XYPoly &poly)
    1002             : {
    1003             :     // CPLAssert(poly.size() >= 2);
    1004      565793 :     const size_t nPointCount = poly.size();
    1005             :     double dfAreaSum =
    1006      565793 :         poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
    1007             : 
    1008     1786950 :     for (size_t i = 1; i < nPointCount - 1; i++)
    1009             :     {
    1010     1221160 :         dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
    1011             :     }
    1012             : 
    1013      565793 :     dfAreaSum += poly[nPointCount - 1].first *
    1014      565793 :                  (poly[0].second - poly[nPointCount - 2].second);
    1015             : 
    1016      565793 :     return 0.5 * std::fabs(dfAreaSum);
    1017             : }
    1018             : 
    1019             : /************************************************************************/
    1020             : /*                       CanUse4SamplesFormula()                        */
    1021             : /************************************************************************/
    1022             : 
    1023        4663 : static bool CanUse4SamplesFormula(const GDALWarpKernel *poWK)
    1024             : {
    1025        4663 :     if (poWK->eResample == GRA_Bilinear || poWK->eResample == GRA_Cubic)
    1026             :     {
    1027             :         // Use 4-sample formula if we are not downsampling by more than a
    1028             :         // factor of 1:2
    1029        2637 :         if (poWK->dfXScale > 0.5 && poWK->dfYScale > 0.5)
    1030        2201 :             return true;
    1031         436 :         CPLDebugOnce("WARP",
    1032             :                      "Not using 4-sample bilinear/bicubic formula because "
    1033             :                      "XSCALE(=%f) and/or YSCALE(=%f) <= 0.5",
    1034             :                      poWK->dfXScale, poWK->dfYScale);
    1035             :     }
    1036        2462 :     return false;
    1037             : }
    1038             : 
    1039             : /************************************************************************/
    1040             : /*                            PerformWarp()                             */
    1041             : /************************************************************************/
    1042             : 
    1043             : /**
    1044             :  * \fn CPLErr GDALWarpKernel::PerformWarp();
    1045             :  *
    1046             :  * This method performs the warp described in the GDALWarpKernel.
    1047             :  *
    1048             :  * @return CE_None on success or CE_Failure if an error occurs.
    1049             :  */
    1050             : 
    1051        3791 : CPLErr GDALWarpKernel::PerformWarp()
    1052             : 
    1053             : {
    1054        3791 :     const CPLErr eErr = Validate();
    1055             : 
    1056        3791 :     if (eErr != CE_None)
    1057           1 :         return eErr;
    1058             : 
    1059             :     // See #2445 and #3079.
    1060        3790 :     if (nSrcXSize <= 0 || nSrcYSize <= 0)
    1061             :     {
    1062         613 :         if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
    1063             :         {
    1064           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    1065           0 :             return CE_Failure;
    1066             :         }
    1067         613 :         return CE_None;
    1068             :     }
    1069             : 
    1070             :     /* -------------------------------------------------------------------- */
    1071             :     /*      Pre-calculate resampling scales and window sizes for filtering. */
    1072             :     /* -------------------------------------------------------------------- */
    1073             : 
    1074        3177 :     dfXScale = 0.0;
    1075        3177 :     dfYScale = 0.0;
    1076             : 
    1077             :     // XSCALE and YSCALE per warping chunk is not necessarily ideal, in case of
    1078             :     // heterogeneous change in shapes.
    1079             :     // Best would probably be a per-pixel scale computation.
    1080        3177 :     const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
    1081        3177 :     const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
    1082        3177 :     if (!pszXScale || !pszYScale)
    1083             :     {
    1084             :         // Sample points along a grid in the destination space
    1085        3176 :         constexpr int MAX_POINTS_PER_DIM = 10;
    1086        3176 :         const int nPointsX = std::min(MAX_POINTS_PER_DIM, nDstXSize);
    1087        3176 :         const int nPointsY = std::min(MAX_POINTS_PER_DIM, nDstYSize);
    1088        3176 :         constexpr int CORNER_COUNT_PER_SQUARE = 4;
    1089        3176 :         const int nPoints = CORNER_COUNT_PER_SQUARE * nPointsX * nPointsY;
    1090        6352 :         std::vector<double> adfX;
    1091        6352 :         std::vector<double> adfY;
    1092        3176 :         adfX.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
    1093        3176 :         adfY.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
    1094        6352 :         std::vector<double> adfZ(CORNER_COUNT_PER_SQUARE * nPoints);
    1095        6352 :         std::vector<int> abSuccess(CORNER_COUNT_PER_SQUARE * nPoints);
    1096       30621 :         for (int iY = 0; iY < nPointsY; iY++)
    1097             :         {
    1098       27445 :             const double dfYShift = (iY > 0 && iY == nPointsY - 1) ? -1.0 : 0.0;
    1099       27445 :             const double dfY =
    1100       27445 :                 dfYShift + (nPointsY == 1 ? 0.0
    1101       27229 :                                           : static_cast<double>(iY) *
    1102       27229 :                                                 nDstYSize / (nPointsY - 1));
    1103             : 
    1104      291550 :             for (int iX = 0; iX < nPointsX; iX++)
    1105             :             {
    1106      264105 :                 const double dfXShift =
    1107      264105 :                     (iX > 0 && iX == nPointsX - 1) ? -1.0 : 0.0;
    1108             : 
    1109      264105 :                 const double dfX =
    1110      264105 :                     dfXShift + (nPointsX == 1 ? 0.0
    1111      263903 :                                               : static_cast<double>(iX) *
    1112      263903 :                                                     nDstXSize / (nPointsX - 1));
    1113             : 
    1114             :                 // Reproject a unit square at each sample point
    1115      264105 :                 adfX.push_back(dfX);
    1116      264105 :                 adfY.push_back(dfY);
    1117             : 
    1118      264105 :                 adfX.push_back(dfX + 1);
    1119      264105 :                 adfY.push_back(dfY);
    1120             : 
    1121      264105 :                 adfX.push_back(dfX);
    1122      264105 :                 adfY.push_back(dfY + 1);
    1123             : 
    1124      264105 :                 adfX.push_back(dfX + 1);
    1125      264105 :                 adfY.push_back(dfY + 1);
    1126             :             }
    1127             :         }
    1128        3176 :         pfnTransformer(pTransformerArg, TRUE, static_cast<int>(adfX.size()),
    1129             :                        adfX.data(), adfY.data(), adfZ.data(), abSuccess.data());
    1130             : 
    1131        6352 :         std::vector<XYPair> adfXYScales;
    1132        3176 :         adfXYScales.reserve(nPoints);
    1133      267281 :         for (int i = 0; i < nPoints; i += CORNER_COUNT_PER_SQUARE)
    1134             :         {
    1135      527094 :             if (abSuccess[i + 0] && abSuccess[i + 1] && abSuccess[i + 2] &&
    1136      262989 :                 abSuccess[i + 3])
    1137             :             {
    1138     2103900 :                 const auto square = [](double x) { return x * x; };
    1139             : 
    1140      262987 :                 const double vx01 = adfX[i + 1] - adfX[i + 0];
    1141      262987 :                 const double vy01 = adfY[i + 1] - adfY[i + 0];
    1142      262987 :                 const double len01_sq = square(vx01) + square(vy01);
    1143             : 
    1144      262987 :                 const double vx23 = adfX[i + 3] - adfX[i + 2];
    1145      262987 :                 const double vy23 = adfY[i + 3] - adfY[i + 2];
    1146      262987 :                 const double len23_sq = square(vx23) + square(vy23);
    1147             : 
    1148      262987 :                 const double vx02 = adfX[i + 2] - adfX[i + 0];
    1149      262987 :                 const double vy02 = adfY[i + 2] - adfY[i + 0];
    1150      262987 :                 const double len02_sq = square(vx02) + square(vy02);
    1151             : 
    1152      262987 :                 const double vx13 = adfX[i + 3] - adfX[i + 1];
    1153      262987 :                 const double vy13 = adfY[i + 3] - adfY[i + 1];
    1154      262987 :                 const double len13_sq = square(vx13) + square(vy13);
    1155             : 
    1156             :                 // ~ 20 degree, heuristic
    1157      262987 :                 constexpr double TAN_MODEST_ANGLE = 0.35;
    1158             : 
    1159             :                 // 10%, heuristic
    1160      262987 :                 constexpr double LENGTH_RELATIVE_TOLERANCE = 0.1;
    1161             : 
    1162             :                 // Security margin to avoid division by zero (would only
    1163             :                 // happen in case of degenerated coordinate transformation,
    1164             :                 // or insane upsampling)
    1165      262987 :                 constexpr double EPSILON = 1e-10;
    1166             : 
    1167             :                 // Does the transformed square looks like an almost non-rotated
    1168             :                 // quasi-rectangle ?
    1169      262987 :                 if (std::fabs(vy01) < TAN_MODEST_ANGLE * vx01 &&
    1170      255870 :                     std::fabs(vy23) < TAN_MODEST_ANGLE * vx23 &&
    1171      255843 :                     std::fabs(len01_sq - len23_sq) <
    1172      255843 :                         LENGTH_RELATIVE_TOLERANCE * std::fabs(len01_sq) &&
    1173      255730 :                     std::fabs(len02_sq - len13_sq) <
    1174      255730 :                         LENGTH_RELATIVE_TOLERANCE * std::fabs(len02_sq))
    1175             :                 {
    1176             :                     // Using a geometric average here of lenAB_sq and lenCD_sq,
    1177             :                     // hence a sqrt(), and as this is still a squared value,
    1178             :                     // we need another sqrt() to get a distance.
    1179             :                     const double dfXLength =
    1180      255715 :                         std::sqrt(std::sqrt(len01_sq * len23_sq));
    1181             :                     const double dfYLength =
    1182      255715 :                         std::sqrt(std::sqrt(len02_sq * len13_sq));
    1183      255715 :                     if (dfXLength > EPSILON && dfYLength > EPSILON)
    1184             :                     {
    1185      255715 :                         const double dfThisXScale = 1.0 / dfXLength;
    1186      255715 :                         const double dfThisYScale = 1.0 / dfYLength;
    1187      255715 :                         adfXYScales.push_back({dfThisXScale, dfThisYScale});
    1188      255715 :                     }
    1189             :                 }
    1190             :                 else
    1191             :                 {
    1192             :                     // If not, then consider the area of the transformed unit
    1193             :                     // square to determine the X/Y scales.
    1194        7272 :                     const XYPoly poly{{adfX[i + 0], adfY[i + 0]},
    1195        7272 :                                       {adfX[i + 1], adfY[i + 1]},
    1196        7272 :                                       {adfX[i + 3], adfY[i + 3]},
    1197       29088 :                                       {adfX[i + 2], adfY[i + 2]}};
    1198        7272 :                     const double dfSrcArea = getArea(poly);
    1199        7272 :                     const double dfFactor = std::sqrt(dfSrcArea);
    1200        7272 :                     if (dfFactor > EPSILON)
    1201             :                     {
    1202        7272 :                         const double dfThisXScale = 1.0 / dfFactor;
    1203        7272 :                         const double dfThisYScale = dfThisXScale;
    1204        7272 :                         adfXYScales.push_back({dfThisXScale, dfThisYScale});
    1205             :                     }
    1206             :                 }
    1207             :             }
    1208             :         }
    1209             : 
    1210        3176 :         if (!adfXYScales.empty())
    1211             :         {
    1212             :             // Sort by increasing xscale * yscale
    1213        3176 :             std::sort(adfXYScales.begin(), adfXYScales.end(),
    1214     1429620 :                       [](const XYPair &a, const XYPair &b)
    1215     1429620 :                       { return a.first * a.second < b.first * b.second; });
    1216             : 
    1217             :             // Compute the per-axis maximum of scale
    1218        3176 :             double dfXMax = 0;
    1219        3176 :             double dfYMax = 0;
    1220      266163 :             for (const auto &[dfX, dfY] : adfXYScales)
    1221             :             {
    1222      262987 :                 dfXMax = std::max(dfXMax, dfX);
    1223      262987 :                 dfYMax = std::max(dfYMax, dfY);
    1224             :             }
    1225             : 
    1226             :             // Now eliminate outliers, defined as ones whose value is < 10% of
    1227             :             // the maximum value, typically found at a polar discontinuity, and
    1228             :             // compute the average of non-outlier values.
    1229        3176 :             dfXScale = 0;
    1230        3176 :             dfYScale = 0;
    1231        3176 :             int i = 0;
    1232        3176 :             constexpr double THRESHOLD = 0.1;  // 10%, rather arbitrary
    1233      266163 :             for (const auto &[dfX, dfY] : adfXYScales)
    1234             :             {
    1235      262987 :                 if (dfX > THRESHOLD * dfXMax && dfY > THRESHOLD * dfYMax)
    1236             :                 {
    1237      260139 :                     ++i;
    1238      260139 :                     const double dfXDelta = dfX - dfXScale;
    1239      260139 :                     const double dfYDelta = dfY - dfYScale;
    1240      260139 :                     const double dfInvI = 1.0 / i;
    1241      260139 :                     dfXScale += dfXDelta * dfInvI;
    1242      260139 :                     dfYScale += dfYDelta * dfInvI;
    1243             :                 }
    1244             :             }
    1245             :         }
    1246             :     }
    1247             : 
    1248             :     // Round to closest integer reciprocal scale if we are very close to it
    1249             :     const auto RoundToClosestIntegerReciprocalScaleIfCloseEnough =
    1250        6354 :         [](double dfScale)
    1251             :     {
    1252        6354 :         if (dfScale < 1.0)
    1253             :         {
    1254        2552 :             double dfReciprocalScale = 1.0 / dfScale;
    1255        2552 :             const int nReciprocalScale =
    1256        2552 :                 static_cast<int>(dfReciprocalScale + 0.5);
    1257        2552 :             if (fabs(dfReciprocalScale - nReciprocalScale) < 0.05)
    1258        2110 :                 dfScale = 1.0 / nReciprocalScale;
    1259             :         }
    1260        6354 :         return dfScale;
    1261             :     };
    1262             : 
    1263        3177 :     if (dfXScale <= 0)
    1264           1 :         dfXScale = 1.0;
    1265        3177 :     if (dfYScale <= 0)
    1266           1 :         dfYScale = 1.0;
    1267             : 
    1268        3177 :     dfXScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfXScale);
    1269        3177 :     dfYScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfYScale);
    1270             : 
    1271        3177 :     if (pszXScale != nullptr)
    1272           1 :         dfXScale = CPLAtof(pszXScale);
    1273        3177 :     if (pszYScale != nullptr)
    1274           1 :         dfYScale = CPLAtof(pszYScale);
    1275             : 
    1276        3177 :     if (!pszXScale || !pszYScale)
    1277        3176 :         CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
    1278             : 
    1279        3177 :     const int bUse4SamplesFormula = CanUse4SamplesFormula(this);
    1280             : 
    1281             :     // Safety check for callers that would use GDALWarpKernel without using
    1282             :     // GDALWarpOperation.
    1283        3114 :     if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
    1284        3049 :          ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
    1285        6354 :           !bUse4SamplesFormula)) &&
    1286         346 :         atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
    1287             :             WARP_EXTRA_ELTS)
    1288             :     {
    1289           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1290             :                  "Source arrays must have WARP_EXTRA_ELTS extra elements at "
    1291             :                  "their end. "
    1292             :                  "See GDALWarpKernel class definition. If this condition is "
    1293             :                  "fulfilled, define a EXTRA_ELTS=%d warp options",
    1294             :                  WARP_EXTRA_ELTS);
    1295           0 :         return CE_Failure;
    1296             :     }
    1297             : 
    1298        3177 :     dfXFilter = anGWKFilterRadius[eResample];
    1299        3177 :     dfYFilter = anGWKFilterRadius[eResample];
    1300             : 
    1301        3177 :     nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
    1302        2591 :                               : static_cast<int>(dfXFilter);
    1303        3177 :     nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
    1304        2608 :                               : static_cast<int>(dfYFilter);
    1305             : 
    1306             :     // Filter window offset depends on the parity of the kernel radius.
    1307        3177 :     nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
    1308        3177 :     nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
    1309             : 
    1310        3177 :     bApplyVerticalShift =
    1311        3177 :         CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
    1312        3177 :     dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
    1313        3177 :         papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
    1314             : 
    1315             :     /* -------------------------------------------------------------------- */
    1316             :     /*      Set up resampling functions.                                    */
    1317             :     /* -------------------------------------------------------------------- */
    1318        3177 :     if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
    1319          12 :         return GWKGeneralCase(this);
    1320             : 
    1321        3165 :     const bool bNoMasksOrDstDensityOnly =
    1322        3155 :         papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
    1323        6320 :         pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
    1324             : 
    1325        3165 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour &&
    1326             :         bNoMasksOrDstDensityOnly)
    1327         909 :         return GWKNearestNoMasksOrDstDensityOnlyByte(this);
    1328             : 
    1329        2256 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Bilinear &&
    1330             :         bNoMasksOrDstDensityOnly)
    1331         128 :         return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
    1332             : 
    1333        2128 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Cubic &&
    1334             :         bNoMasksOrDstDensityOnly)
    1335         850 :         return GWKCubicNoMasksOrDstDensityOnlyByte(this);
    1336             : 
    1337        1278 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_CubicSpline &&
    1338             :         bNoMasksOrDstDensityOnly)
    1339          12 :         return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
    1340             : 
    1341        1266 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour)
    1342         363 :         return GWKNearestByte(this);
    1343             : 
    1344         903 :     if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
    1345         154 :         eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
    1346          14 :         return GWKNearestNoMasksOrDstDensityOnlyShort(this);
    1347             : 
    1348         889 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
    1349             :         bNoMasksOrDstDensityOnly)
    1350           5 :         return GWKCubicNoMasksOrDstDensityOnlyShort(this);
    1351             : 
    1352         884 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
    1353             :         bNoMasksOrDstDensityOnly)
    1354           6 :         return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
    1355             : 
    1356         878 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
    1357             :         bNoMasksOrDstDensityOnly)
    1358           5 :         return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
    1359             : 
    1360         873 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
    1361             :         bNoMasksOrDstDensityOnly)
    1362          14 :         return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
    1363             : 
    1364         859 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
    1365             :         bNoMasksOrDstDensityOnly)
    1366           5 :         return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
    1367             : 
    1368         854 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
    1369             :         bNoMasksOrDstDensityOnly)
    1370           6 :         return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
    1371             : 
    1372         848 :     if (eWorkingDataType == GDT_Int8 && eResample == GRA_NearestNeighbour)
    1373           9 :         return GWKNearestInt8(this);
    1374             : 
    1375         839 :     if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
    1376          40 :         return GWKNearestShort(this);
    1377             : 
    1378         799 :     if (eWorkingDataType == GDT_UInt16 && eResample == GRA_NearestNeighbour)
    1379          10 :         return GWKNearestUnsignedShort(this);
    1380             : 
    1381         789 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
    1382             :         bNoMasksOrDstDensityOnly)
    1383          11 :         return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
    1384             : 
    1385         778 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
    1386          50 :         return GWKNearestFloat(this);
    1387             : 
    1388         728 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
    1389             :         bNoMasksOrDstDensityOnly)
    1390           4 :         return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
    1391             : 
    1392         724 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
    1393             :         bNoMasksOrDstDensityOnly)
    1394           9 :         return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
    1395             : 
    1396             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    1397             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
    1398             :         bNoMasksOrDstDensityOnly)
    1399             :         return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
    1400             : 
    1401             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
    1402             :         bNoMasksOrDstDensityOnly)
    1403             :         return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
    1404             : #endif
    1405             : 
    1406         715 :     if (eResample == GRA_Average)
    1407         160 :         return GWKAverageOrMode(this);
    1408             : 
    1409         555 :     if (eResample == GRA_RMS)
    1410           9 :         return GWKAverageOrMode(this);
    1411             : 
    1412         546 :     if (eResample == GRA_Mode)
    1413          45 :         return GWKAverageOrMode(this);
    1414             : 
    1415         501 :     if (eResample == GRA_Max)
    1416           6 :         return GWKAverageOrMode(this);
    1417             : 
    1418         495 :     if (eResample == GRA_Min)
    1419           5 :         return GWKAverageOrMode(this);
    1420             : 
    1421         490 :     if (eResample == GRA_Med)
    1422           6 :         return GWKAverageOrMode(this);
    1423             : 
    1424         484 :     if (eResample == GRA_Q1)
    1425          10 :         return GWKAverageOrMode(this);
    1426             : 
    1427         474 :     if (eResample == GRA_Q3)
    1428           5 :         return GWKAverageOrMode(this);
    1429             : 
    1430         469 :     if (eResample == GRA_Sum)
    1431          19 :         return GWKSumPreserving(this);
    1432             : 
    1433         450 :     if (!GDALDataTypeIsComplex(eWorkingDataType))
    1434             :     {
    1435         223 :         return GWKRealCase(this);
    1436             :     }
    1437             : 
    1438         227 :     return GWKGeneralCase(this);
    1439             : }
    1440             : 
    1441             : /************************************************************************/
    1442             : /*                              Validate()                              */
    1443             : /************************************************************************/
    1444             : 
    1445             : /**
    1446             :  * \fn CPLErr GDALWarpKernel::Validate()
    1447             :  *
    1448             :  * Check the settings in the GDALWarpKernel, and issue a CPLError()
    1449             :  * (and return CE_Failure) if the configuration is considered to be
    1450             :  * invalid for some reason.
    1451             :  *
    1452             :  * This method will also do some standard defaulting such as setting
    1453             :  * pfnProgress to GDALDummyProgress() if it is NULL.
    1454             :  *
    1455             :  * @return CE_None on success or CE_Failure if an error is detected.
    1456             :  */
    1457             : 
    1458        3791 : CPLErr GDALWarpKernel::Validate()
    1459             : 
    1460             : {
    1461        3791 :     if (static_cast<size_t>(eResample) >=
    1462             :         (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
    1463             :     {
    1464           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1465             :                  "Unsupported resampling method %d.",
    1466           0 :                  static_cast<int>(eResample));
    1467           0 :         return CE_Failure;
    1468             :     }
    1469             : 
    1470             :     // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
    1471             :     // be ignored as contributing source pixels during resampling. Only taken into account by
    1472             :     // Average currently
    1473             :     const char *pszExcludedValues =
    1474        3791 :         CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
    1475        3791 :     if (pszExcludedValues)
    1476             :     {
    1477             :         const CPLStringList aosTokens(
    1478          18 :             CSLTokenizeString2(pszExcludedValues, "(,)", 0));
    1479          18 :         if ((aosTokens.size() % nBands) != 0)
    1480             :         {
    1481           1 :             CPLError(CE_Failure, CPLE_AppDefined,
    1482             :                      "EXCLUDED_VALUES should contain one or several tuples of "
    1483             :                      "%d values formatted like <R>,<G>,<B> or "
    1484             :                      "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
    1485             :                      "tuples",
    1486             :                      nBands);
    1487           1 :             return CE_Failure;
    1488             :         }
    1489          34 :         std::vector<double> adfTuple;
    1490          68 :         for (int i = 0; i < aosTokens.size(); ++i)
    1491             :         {
    1492          51 :             adfTuple.push_back(CPLAtof(aosTokens[i]));
    1493          51 :             if (((i + 1) % nBands) == 0)
    1494             :             {
    1495          17 :                 m_aadfExcludedValues.push_back(adfTuple);
    1496          17 :                 adfTuple.clear();
    1497             :             }
    1498             :         }
    1499             :     }
    1500             : 
    1501        3790 :     return CE_None;
    1502             : }
    1503             : 
    1504             : /************************************************************************/
    1505             : /*                         GWKOverlayDensity()                          */
    1506             : /*                                                                      */
    1507             : /*      Compute the final density for the destination pixel.  This      */
    1508             : /*      is a function of the overlay density (passed in) and the        */
    1509             : /*      original density.                                               */
    1510             : /************************************************************************/
    1511             : 
    1512    17762100 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
    1513             :                               double dfDensity)
    1514             : {
    1515    17762100 :     if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
    1516    13309900 :         return;
    1517             : 
    1518     4452160 :     poWK->pafDstDensity[iDstOffset] =
    1519     4452160 :         1.0f -
    1520     4452160 :         (1.0f - float(dfDensity)) * (1.0f - poWK->pafDstDensity[iDstOffset]);
    1521             : }
    1522             : 
    1523             : /************************************************************************/
    1524             : /*                           GWKRoundValueT()                           */
    1525             : /************************************************************************/
    1526             : 
    1527             : template <class T, class U, bool is_signed> struct sGWKRoundValueT
    1528             : {
    1529             :     static T eval(U);
    1530             : };
    1531             : 
    1532             : template <class T, class U> struct sGWKRoundValueT<T, U, true> /* signed */
    1533             : {
    1534      791525 :     static T eval(U value)
    1535             :     {
    1536      791525 :         return static_cast<T>(floor(value + U(0.5)));
    1537             :     }
    1538             : };
    1539             : 
    1540             : template <class T, class U> struct sGWKRoundValueT<T, U, false> /* unsigned */
    1541             : {
    1542   152026497 :     static T eval(U value)
    1543             :     {
    1544   152026497 :         return static_cast<T>(value + U(0.5));
    1545             :     }
    1546             : };
    1547             : 
    1548   152818022 : template <class T, class U> static T GWKRoundValueT(U value)
    1549             : {
    1550   152818022 :     return sGWKRoundValueT<T, U, cpl::NumericLimits<T>::is_signed>::eval(value);
    1551             : }
    1552             : 
    1553      268974 : template <> float GWKRoundValueT<float, double>(double value)
    1554             : {
    1555      268974 :     return static_cast<float>(value);
    1556             : }
    1557             : 
    1558             : #ifdef notused
    1559             : template <> double GWKRoundValueT<double, double>(double value)
    1560             : {
    1561             :     return value;
    1562             : }
    1563             : #endif
    1564             : 
    1565             : /************************************************************************/
    1566             : /*                           GWKClampValueT()                           */
    1567             : /************************************************************************/
    1568             : 
    1569   145451362 : template <class T, class U> static CPL_INLINE T GWKClampValueT(U value)
    1570             : {
    1571   145451362 :     if (value < static_cast<U>(cpl::NumericLimits<T>::min()))
    1572      569367 :         return cpl::NumericLimits<T>::min();
    1573   144881964 :     else if (value > static_cast<U>(cpl::NumericLimits<T>::max()))
    1574      773825 :         return cpl::NumericLimits<T>::max();
    1575             :     else
    1576   144107844 :         return GWKRoundValueT<T, U>(value);
    1577             : }
    1578             : 
    1579      718914 : template <> float GWKClampValueT<float, double>(double dfValue)
    1580             : {
    1581      718914 :     return static_cast<float>(dfValue);
    1582             : }
    1583             : 
    1584             : #ifdef notused
    1585             : template <> double GWKClampValueT<double, double>(double dfValue)
    1586             : {
    1587             :     return dfValue;
    1588             : }
    1589             : #endif
    1590             : 
    1591             : /************************************************************************/
    1592             : /*                            AvoidNoData()                             */
    1593             : /************************************************************************/
    1594             : 
    1595        1283 : template <class T> inline void AvoidNoData(T *pDst, GPtrDiff_t iDstOffset)
    1596             : {
    1597             :     if constexpr (cpl::NumericLimits<T>::is_integer)
    1598             :     {
    1599        1027 :         if (pDst[iDstOffset] == static_cast<T>(cpl::NumericLimits<T>::lowest()))
    1600             :         {
    1601         515 :             pDst[iDstOffset] =
    1602         515 :                 static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
    1603             :         }
    1604             :         else
    1605         512 :             pDst[iDstOffset]--;
    1606             :     }
    1607             :     else
    1608             :     {
    1609         256 :         if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
    1610             :         {
    1611             :             using std::nextafter;
    1612           0 :             pDst[iDstOffset] = nextafter(pDst[iDstOffset], static_cast<T>(0));
    1613             :         }
    1614             :         else
    1615             :         {
    1616             :             using std::nextafter;
    1617         256 :             pDst[iDstOffset] =
    1618         256 :                 nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
    1619             :         }
    1620             :     }
    1621        1283 : }
    1622             : 
    1623             : /************************************************************************/
    1624             : /*                            AvoidNoData()                             */
    1625             : /************************************************************************/
    1626             : 
    1627             : template <class T>
    1628    25539330 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1629             :                         GPtrDiff_t iDstOffset)
    1630             : {
    1631    25539330 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1632    25539330 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1633             : 
    1634    25539330 :     if (poWK->padfDstNoDataReal != nullptr &&
    1635    11380638 :         poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
    1636             :     {
    1637         640 :         AvoidNoData(pDst, iDstOffset);
    1638             : 
    1639         640 :         if (!poWK->bWarnedAboutDstNoDataReplacement)
    1640             :         {
    1641          40 :             const_cast<GDALWarpKernel *>(poWK)
    1642             :                 ->bWarnedAboutDstNoDataReplacement = true;
    1643          40 :             CPLError(CE_Warning, CPLE_AppDefined,
    1644             :                      "Value %g in the source dataset has been changed to %g "
    1645             :                      "in the destination dataset to avoid being treated as "
    1646             :                      "NoData. To avoid this, select a different NoData value "
    1647             :                      "for the destination dataset.",
    1648          40 :                      poWK->padfDstNoDataReal[iBand],
    1649          40 :                      static_cast<double>(pDst[iDstOffset]));
    1650             :         }
    1651             :     }
    1652    25539330 : }
    1653             : 
    1654             : /************************************************************************/
    1655             : /*                      GWKAvoidNoDataMultiBand()                       */
    1656             : /************************************************************************/
    1657             : 
    1658             : template <class T>
    1659      524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
    1660             :                                     GPtrDiff_t iDstOffset)
    1661             : {
    1662      524573 :     T **ppDst = reinterpret_cast<T **>(poWK->papabyDstImage);
    1663      524573 :     if (poWK->padfDstNoDataReal != nullptr)
    1664             :     {
    1665      208615 :         for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1666             :         {
    1667      208294 :             if (poWK->padfDstNoDataReal[iBand] !=
    1668      208294 :                 static_cast<double>(ppDst[iBand][iDstOffset]))
    1669      205830 :                 return;
    1670             :         }
    1671         964 :         for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1672             :         {
    1673         643 :             AvoidNoData(ppDst[iBand], iDstOffset);
    1674             :         }
    1675             : 
    1676         321 :         if (!poWK->bWarnedAboutDstNoDataReplacement)
    1677             :         {
    1678          21 :             const_cast<GDALWarpKernel *>(poWK)
    1679             :                 ->bWarnedAboutDstNoDataReplacement = true;
    1680          42 :             std::string valueSrc, valueDst;
    1681          64 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1682             :             {
    1683          43 :                 if (!valueSrc.empty())
    1684             :                 {
    1685          22 :                     valueSrc += ',';
    1686          22 :                     valueDst += ',';
    1687             :                 }
    1688          43 :                 valueSrc += CPLSPrintf("%g", poWK->padfDstNoDataReal[iBand]);
    1689          43 :                 valueDst += CPLSPrintf(
    1690          43 :                     "%g", static_cast<double>(ppDst[iBand][iDstOffset]));
    1691             :             }
    1692          21 :             CPLError(CE_Warning, CPLE_AppDefined,
    1693             :                      "Value %s in the source dataset has been changed to %s "
    1694             :                      "in the destination dataset to avoid being treated as "
    1695             :                      "NoData. To avoid this, select a different NoData value "
    1696             :                      "for the destination dataset.",
    1697             :                      valueSrc.c_str(), valueDst.c_str());
    1698             :         }
    1699             :     }
    1700             : }
    1701             : 
    1702             : /************************************************************************/
    1703             : /*                      GWKAvoidNoDataMultiBand()                       */
    1704             : /************************************************************************/
    1705             : 
    1706      524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
    1707             :                                     GPtrDiff_t iDstOffset)
    1708             : {
    1709      524573 :     switch (poWK->eWorkingDataType)
    1710             :     {
    1711      523997 :         case GDT_UInt8:
    1712      523997 :             GWKAvoidNoDataMultiBand<std::uint8_t>(poWK, iDstOffset);
    1713      523997 :             break;
    1714             : 
    1715          64 :         case GDT_Int8:
    1716          64 :             GWKAvoidNoDataMultiBand<std::int8_t>(poWK, iDstOffset);
    1717          64 :             break;
    1718             : 
    1719          64 :         case GDT_Int16:
    1720          64 :             GWKAvoidNoDataMultiBand<std::int16_t>(poWK, iDstOffset);
    1721          64 :             break;
    1722             : 
    1723          64 :         case GDT_UInt16:
    1724          64 :             GWKAvoidNoDataMultiBand<std::uint16_t>(poWK, iDstOffset);
    1725          64 :             break;
    1726             : 
    1727          64 :         case GDT_Int32:
    1728          64 :             GWKAvoidNoDataMultiBand<std::int32_t>(poWK, iDstOffset);
    1729          64 :             break;
    1730             : 
    1731          64 :         case GDT_UInt32:
    1732          64 :             GWKAvoidNoDataMultiBand<std::uint32_t>(poWK, iDstOffset);
    1733          64 :             break;
    1734             : 
    1735          64 :         case GDT_Int64:
    1736          64 :             GWKAvoidNoDataMultiBand<std::int64_t>(poWK, iDstOffset);
    1737          64 :             break;
    1738             : 
    1739          64 :         case GDT_UInt64:
    1740          64 :             GWKAvoidNoDataMultiBand<std::uint64_t>(poWK, iDstOffset);
    1741          64 :             break;
    1742             : 
    1743           0 :         case GDT_Float16:
    1744           0 :             GWKAvoidNoDataMultiBand<GFloat16>(poWK, iDstOffset);
    1745           0 :             break;
    1746             : 
    1747          64 :         case GDT_Float32:
    1748          64 :             GWKAvoidNoDataMultiBand<float>(poWK, iDstOffset);
    1749          64 :             break;
    1750             : 
    1751          64 :         case GDT_Float64:
    1752          64 :             GWKAvoidNoDataMultiBand<double>(poWK, iDstOffset);
    1753          64 :             break;
    1754             : 
    1755           0 :         case GDT_CInt16:
    1756             :         case GDT_CInt32:
    1757             :         case GDT_CFloat16:
    1758             :         case GDT_CFloat32:
    1759             :         case GDT_CFloat64:
    1760             :         case GDT_Unknown:
    1761             :         case GDT_TypeCount:
    1762           0 :             break;
    1763             :     }
    1764      524573 : }
    1765             : 
    1766             : /************************************************************************/
    1767             : /*                       GWKSetPixelValueRealT()                        */
    1768             : /************************************************************************/
    1769             : 
    1770             : template <class T>
    1771    14954277 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
    1772             :                                   GPtrDiff_t iDstOffset, double dfDensity,
    1773             :                                   T value, bool bAvoidNoDataSingleBand)
    1774             : {
    1775    14954277 :     T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    1776             : 
    1777             :     /* -------------------------------------------------------------------- */
    1778             :     /*      If the source density is less than 100% we need to fetch the    */
    1779             :     /*      existing destination value, and mix it with the source to       */
    1780             :     /*      get the new "to apply" value.  Also compute composite           */
    1781             :     /*      density.                                                        */
    1782             :     /*                                                                      */
    1783             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1784             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1785             :     /* -------------------------------------------------------------------- */
    1786    14954277 :     if (dfDensity < 0.9999)
    1787             :     {
    1788      945508 :         if (dfDensity < 0.0001)
    1789           0 :             return true;
    1790             : 
    1791      945508 :         double dfDstDensity = 1.0;
    1792             : 
    1793      945508 :         if (poWK->pafDstDensity != nullptr)
    1794      944036 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    1795        1472 :         else if (poWK->panDstValid != nullptr &&
    1796           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1797           0 :             dfDstDensity = 0.0;
    1798             : 
    1799             :         // It seems like we also ought to be testing panDstValid[] here!
    1800             : 
    1801      945508 :         const double dfDstReal = static_cast<double>(pDst[iDstOffset]);
    1802             : 
    1803             :         // The destination density is really only relative to the portion
    1804             :         // not occluded by the overlay.
    1805      945508 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1806             : 
    1807      945508 :         const double dfReal =
    1808      945508 :             (double(value) * dfDensity + dfDstReal * dfDstInfluence) /
    1809      945508 :             (dfDensity + dfDstInfluence);
    1810             : 
    1811             :         /* --------------------------------------------------------------------
    1812             :          */
    1813             :         /*      Actually apply the destination value. */
    1814             :         /*                                                                      */
    1815             :         /*      Avoid using the destination nodata value for integer datatypes
    1816             :          */
    1817             :         /*      if by chance it is equal to the computed pixel value. */
    1818             :         /* --------------------------------------------------------------------
    1819             :          */
    1820      945508 :         pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
    1821             :     }
    1822             :     else
    1823             :     {
    1824    14008836 :         pDst[iDstOffset] = value;
    1825             :     }
    1826             : 
    1827    14954277 :     if (bAvoidNoDataSingleBand)
    1828    13681621 :         AvoidNoData<T>(poWK, iBand, iDstOffset);
    1829             : 
    1830    14954277 :     return true;
    1831             : }
    1832             : 
    1833             : /************************************************************************/
    1834             : /*                      ClampRoundAndAvoidNoData()                      */
    1835             : /************************************************************************/
    1836             : 
    1837             : template <class T>
    1838    12158105 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1839             :                                      GPtrDiff_t iDstOffset, double dfReal,
    1840             :                                      bool bAvoidNoDataSingleBand)
    1841             : {
    1842    12158105 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1843    12158105 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1844             : 
    1845             :     if constexpr (cpl::NumericLimits<T>::is_integer)
    1846             :     {
    1847             :         using std::floor;
    1848    11660975 :         if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
    1849        6430 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
    1850    11654575 :         else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1851       23967 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
    1852             :         else if constexpr (cpl::NumericLimits<T>::is_signed)
    1853       10410 :             pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
    1854             :         else
    1855    11620165 :             pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
    1856             :     }
    1857             :     else
    1858             :     {
    1859      497130 :         pDst[iDstOffset] = static_cast<T>(dfReal);
    1860             :     }
    1861             : 
    1862    12158105 :     if (bAvoidNoDataSingleBand)
    1863    11857709 :         AvoidNoData<T>(poWK, iBand, iDstOffset);
    1864    12158105 : }
    1865             : 
    1866             : /************************************************************************/
    1867             : /*                          GWKSetPixelValue()                          */
    1868             : /************************************************************************/
    1869             : 
    1870    11045400 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
    1871             :                              GPtrDiff_t iDstOffset, double dfDensity,
    1872             :                              double dfReal, double dfImag,
    1873             :                              bool bAvoidNoDataSingleBand)
    1874             : 
    1875             : {
    1876    11045400 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1877             : 
    1878             :     /* -------------------------------------------------------------------- */
    1879             :     /*      If the source density is less than 100% we need to fetch the    */
    1880             :     /*      existing destination value, and mix it with the source to       */
    1881             :     /*      get the new "to apply" value.  Also compute composite           */
    1882             :     /*      density.                                                        */
    1883             :     /*                                                                      */
    1884             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1885             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1886             :     /* -------------------------------------------------------------------- */
    1887    11045400 :     if (dfDensity < 0.9999)
    1888             :     {
    1889         800 :         if (dfDensity < 0.0001)
    1890           0 :             return true;
    1891             : 
    1892         800 :         double dfDstDensity = 1.0;
    1893         800 :         if (poWK->pafDstDensity != nullptr)
    1894         800 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    1895           0 :         else if (poWK->panDstValid != nullptr &&
    1896           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1897           0 :             dfDstDensity = 0.0;
    1898             : 
    1899         800 :         double dfDstReal = 0.0;
    1900         800 :         double dfDstImag = 0.0;
    1901             :         // It seems like we also ought to be testing panDstValid[] here!
    1902             : 
    1903             :         // TODO(schwehr): Factor out this repreated type of set.
    1904         800 :         switch (poWK->eWorkingDataType)
    1905             :         {
    1906           0 :             case GDT_UInt8:
    1907           0 :                 dfDstReal = pabyDst[iDstOffset];
    1908           0 :                 dfDstImag = 0.0;
    1909           0 :                 break;
    1910             : 
    1911           0 :             case GDT_Int8:
    1912           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1913           0 :                 dfDstImag = 0.0;
    1914           0 :                 break;
    1915             : 
    1916         400 :             case GDT_Int16:
    1917         400 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1918         400 :                 dfDstImag = 0.0;
    1919         400 :                 break;
    1920             : 
    1921         400 :             case GDT_UInt16:
    1922         400 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1923         400 :                 dfDstImag = 0.0;
    1924         400 :                 break;
    1925             : 
    1926           0 :             case GDT_Int32:
    1927           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1928           0 :                 dfDstImag = 0.0;
    1929           0 :                 break;
    1930             : 
    1931           0 :             case GDT_UInt32:
    1932           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1933           0 :                 dfDstImag = 0.0;
    1934           0 :                 break;
    1935             : 
    1936           0 :             case GDT_Int64:
    1937           0 :                 dfDstReal = static_cast<double>(
    1938           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1939           0 :                 dfDstImag = 0.0;
    1940           0 :                 break;
    1941             : 
    1942           0 :             case GDT_UInt64:
    1943           0 :                 dfDstReal = static_cast<double>(
    1944           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1945           0 :                 dfDstImag = 0.0;
    1946           0 :                 break;
    1947             : 
    1948           0 :             case GDT_Float16:
    1949           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    1950           0 :                 dfDstImag = 0.0;
    1951           0 :                 break;
    1952             : 
    1953           0 :             case GDT_Float32:
    1954           0 :                 dfDstReal =
    1955           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
    1956           0 :                 dfDstImag = 0.0;
    1957           0 :                 break;
    1958             : 
    1959           0 :             case GDT_Float64:
    1960           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1961           0 :                 dfDstImag = 0.0;
    1962           0 :                 break;
    1963             : 
    1964           0 :             case GDT_CInt16:
    1965           0 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
    1966           0 :                 dfDstImag =
    1967           0 :                     reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
    1968           0 :                 break;
    1969             : 
    1970           0 :             case GDT_CInt32:
    1971           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
    1972           0 :                 dfDstImag =
    1973           0 :                     reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
    1974           0 :                 break;
    1975             : 
    1976           0 :             case GDT_CFloat16:
    1977             :                 dfDstReal =
    1978           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
    1979             :                 dfDstImag =
    1980           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
    1981           0 :                 break;
    1982             : 
    1983           0 :             case GDT_CFloat32:
    1984           0 :                 dfDstReal =
    1985           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset * 2]);
    1986           0 :                 dfDstImag = double(
    1987           0 :                     reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1]);
    1988           0 :                 break;
    1989             : 
    1990           0 :             case GDT_CFloat64:
    1991           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
    1992           0 :                 dfDstImag =
    1993           0 :                     reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
    1994           0 :                 break;
    1995             : 
    1996           0 :             case GDT_Unknown:
    1997             :             case GDT_TypeCount:
    1998           0 :                 CPLAssert(false);
    1999             :                 return false;
    2000             :         }
    2001             : 
    2002             :         // The destination density is really only relative to the portion
    2003             :         // not occluded by the overlay.
    2004         800 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    2005             : 
    2006         800 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    2007         800 :                  (dfDensity + dfDstInfluence);
    2008             : 
    2009         800 :         dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
    2010         800 :                  (dfDensity + dfDstInfluence);
    2011             :     }
    2012             : 
    2013             :     /* -------------------------------------------------------------------- */
    2014             :     /*      Actually apply the destination value.                           */
    2015             :     /*                                                                      */
    2016             :     /*      Avoid using the destination nodata value for integer datatypes  */
    2017             :     /*      if by chance it is equal to the computed pixel value.           */
    2018             :     /* -------------------------------------------------------------------- */
    2019             : 
    2020    11045400 :     switch (poWK->eWorkingDataType)
    2021             :     {
    2022    10323000 :         case GDT_UInt8:
    2023    10323000 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
    2024             :                                             bAvoidNoDataSingleBand);
    2025    10323000 :             break;
    2026             : 
    2027           1 :         case GDT_Int8:
    2028           1 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
    2029             :                                             bAvoidNoDataSingleBand);
    2030           1 :             break;
    2031             : 
    2032        7471 :         case GDT_Int16:
    2033        7471 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
    2034             :                                              bAvoidNoDataSingleBand);
    2035        7471 :             break;
    2036             : 
    2037         464 :         case GDT_UInt16:
    2038         464 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
    2039             :                                               bAvoidNoDataSingleBand);
    2040         464 :             break;
    2041             : 
    2042          63 :         case GDT_UInt32:
    2043          63 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
    2044             :                                               bAvoidNoDataSingleBand);
    2045          63 :             break;
    2046             : 
    2047          63 :         case GDT_Int32:
    2048          63 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
    2049             :                                              bAvoidNoDataSingleBand);
    2050          63 :             break;
    2051             : 
    2052           0 :         case GDT_UInt64:
    2053           0 :             ClampRoundAndAvoidNoData<std::uint64_t>(
    2054             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2055           0 :             break;
    2056             : 
    2057           0 :         case GDT_Int64:
    2058           0 :             ClampRoundAndAvoidNoData<std::int64_t>(
    2059             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2060           0 :             break;
    2061             : 
    2062           0 :         case GDT_Float16:
    2063           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
    2064             :                                                bAvoidNoDataSingleBand);
    2065           0 :             break;
    2066             : 
    2067      478957 :         case GDT_Float32:
    2068      478957 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
    2069             :                                             bAvoidNoDataSingleBand);
    2070      478957 :             break;
    2071             : 
    2072         149 :         case GDT_Float64:
    2073         149 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
    2074             :                                              bAvoidNoDataSingleBand);
    2075         149 :             break;
    2076             : 
    2077      234079 :         case GDT_CInt16:
    2078             :         {
    2079             :             typedef GInt16 T;
    2080      234079 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    2081           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2082           0 :                     cpl::NumericLimits<T>::min();
    2083      234079 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    2084           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2085           0 :                     cpl::NumericLimits<T>::max();
    2086             :             else
    2087      234079 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2088      234079 :                     static_cast<T>(floor(dfReal + 0.5));
    2089      234079 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    2090           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2091           0 :                     cpl::NumericLimits<T>::min();
    2092      234079 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    2093           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2094           0 :                     cpl::NumericLimits<T>::max();
    2095             :             else
    2096      234079 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2097      234079 :                     static_cast<T>(floor(dfImag + 0.5));
    2098      234079 :             break;
    2099             :         }
    2100             : 
    2101         379 :         case GDT_CInt32:
    2102             :         {
    2103             :             typedef GInt32 T;
    2104         379 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    2105           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2106           0 :                     cpl::NumericLimits<T>::min();
    2107         379 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    2108           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2109           0 :                     cpl::NumericLimits<T>::max();
    2110             :             else
    2111         379 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2112         379 :                     static_cast<T>(floor(dfReal + 0.5));
    2113         379 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    2114           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2115           0 :                     cpl::NumericLimits<T>::min();
    2116         379 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    2117           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2118           0 :                     cpl::NumericLimits<T>::max();
    2119             :             else
    2120         379 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2121         379 :                     static_cast<T>(floor(dfImag + 0.5));
    2122         379 :             break;
    2123             :         }
    2124             : 
    2125           0 :         case GDT_CFloat16:
    2126           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
    2127           0 :                 static_cast<GFloat16>(dfReal);
    2128           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
    2129           0 :                 static_cast<GFloat16>(dfImag);
    2130           0 :             break;
    2131             : 
    2132         394 :         case GDT_CFloat32:
    2133         394 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
    2134         394 :                 static_cast<float>(dfReal);
    2135         394 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
    2136         394 :                 static_cast<float>(dfImag);
    2137         394 :             break;
    2138             : 
    2139         380 :         case GDT_CFloat64:
    2140         380 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
    2141         380 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
    2142         380 :             break;
    2143             : 
    2144           0 :         case GDT_Unknown:
    2145             :         case GDT_TypeCount:
    2146           0 :             return false;
    2147             :     }
    2148             : 
    2149    11045400 :     return true;
    2150             : }
    2151             : 
    2152             : /************************************************************************/
    2153             : /*                        GWKSetPixelValueReal()                        */
    2154             : /************************************************************************/
    2155             : 
    2156     1347980 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2157             :                                  GPtrDiff_t iDstOffset, double dfDensity,
    2158             :                                  double dfReal, bool bAvoidNoDataSingleBand)
    2159             : 
    2160             : {
    2161     1347980 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    2162             : 
    2163             :     /* -------------------------------------------------------------------- */
    2164             :     /*      If the source density is less than 100% we need to fetch the    */
    2165             :     /*      existing destination value, and mix it with the source to       */
    2166             :     /*      get the new "to apply" value.  Also compute composite           */
    2167             :     /*      density.                                                        */
    2168             :     /*                                                                      */
    2169             :     /*      We avoid mixing if density is very near one or risk mixing      */
    2170             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    2171             :     /* -------------------------------------------------------------------- */
    2172     1347980 :     if (dfDensity < 0.9999)
    2173             :     {
    2174         600 :         if (dfDensity < 0.0001)
    2175           0 :             return true;
    2176             : 
    2177         600 :         double dfDstReal = 0.0;
    2178         600 :         double dfDstDensity = 1.0;
    2179             : 
    2180         600 :         if (poWK->pafDstDensity != nullptr)
    2181         600 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    2182           0 :         else if (poWK->panDstValid != nullptr &&
    2183           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    2184           0 :             dfDstDensity = 0.0;
    2185             : 
    2186             :         // It seems like we also ought to be testing panDstValid[] here!
    2187             : 
    2188         600 :         switch (poWK->eWorkingDataType)
    2189             :         {
    2190           0 :             case GDT_UInt8:
    2191           0 :                 dfDstReal = pabyDst[iDstOffset];
    2192           0 :                 break;
    2193             : 
    2194           0 :             case GDT_Int8:
    2195           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    2196           0 :                 break;
    2197             : 
    2198         300 :             case GDT_Int16:
    2199         300 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    2200         300 :                 break;
    2201             : 
    2202         300 :             case GDT_UInt16:
    2203         300 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    2204         300 :                 break;
    2205             : 
    2206           0 :             case GDT_Int32:
    2207           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    2208           0 :                 break;
    2209             : 
    2210           0 :             case GDT_UInt32:
    2211           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    2212           0 :                 break;
    2213             : 
    2214           0 :             case GDT_Int64:
    2215           0 :                 dfDstReal = static_cast<double>(
    2216           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    2217           0 :                 break;
    2218             : 
    2219           0 :             case GDT_UInt64:
    2220           0 :                 dfDstReal = static_cast<double>(
    2221           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    2222           0 :                 break;
    2223             : 
    2224           0 :             case GDT_Float16:
    2225           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    2226           0 :                 break;
    2227             : 
    2228           0 :             case GDT_Float32:
    2229           0 :                 dfDstReal =
    2230           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
    2231           0 :                 break;
    2232             : 
    2233           0 :             case GDT_Float64:
    2234           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    2235           0 :                 break;
    2236             : 
    2237           0 :             case GDT_CInt16:
    2238             :             case GDT_CInt32:
    2239             :             case GDT_CFloat16:
    2240             :             case GDT_CFloat32:
    2241             :             case GDT_CFloat64:
    2242             :             case GDT_Unknown:
    2243             :             case GDT_TypeCount:
    2244           0 :                 CPLAssert(false);
    2245             :                 return false;
    2246             :         }
    2247             : 
    2248             :         // The destination density is really only relative to the portion
    2249             :         // not occluded by the overlay.
    2250         600 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    2251             : 
    2252         600 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    2253         600 :                  (dfDensity + dfDstInfluence);
    2254             :     }
    2255             : 
    2256             :     /* -------------------------------------------------------------------- */
    2257             :     /*      Actually apply the destination value.                           */
    2258             :     /*                                                                      */
    2259             :     /*      Avoid using the destination nodata value for integer datatypes  */
    2260             :     /*      if by chance it is equal to the computed pixel value.           */
    2261             :     /* -------------------------------------------------------------------- */
    2262             : 
    2263     1347980 :     switch (poWK->eWorkingDataType)
    2264             :     {
    2265     1325840 :         case GDT_UInt8:
    2266     1325840 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
    2267             :                                             bAvoidNoDataSingleBand);
    2268     1325840 :             break;
    2269             : 
    2270         112 :         case GDT_Int8:
    2271         112 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
    2272             :                                             bAvoidNoDataSingleBand);
    2273         112 :             break;
    2274             : 
    2275        1197 :         case GDT_Int16:
    2276        1197 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
    2277             :                                              bAvoidNoDataSingleBand);
    2278        1197 :             break;
    2279             : 
    2280         475 :         case GDT_UInt16:
    2281         475 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
    2282             :                                               bAvoidNoDataSingleBand);
    2283         475 :             break;
    2284             : 
    2285         539 :         case GDT_UInt32:
    2286         539 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
    2287             :                                               bAvoidNoDataSingleBand);
    2288         539 :             break;
    2289             : 
    2290        1342 :         case GDT_Int32:
    2291        1342 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
    2292             :                                              bAvoidNoDataSingleBand);
    2293        1342 :             break;
    2294             : 
    2295         224 :         case GDT_UInt64:
    2296         224 :             ClampRoundAndAvoidNoData<std::uint64_t>(
    2297             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2298         224 :             break;
    2299             : 
    2300         224 :         case GDT_Int64:
    2301         224 :             ClampRoundAndAvoidNoData<std::int64_t>(
    2302             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2303         224 :             break;
    2304             : 
    2305           0 :         case GDT_Float16:
    2306           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
    2307             :                                                bAvoidNoDataSingleBand);
    2308           0 :             break;
    2309             : 
    2310        3538 :         case GDT_Float32:
    2311        3538 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
    2312             :                                             bAvoidNoDataSingleBand);
    2313        3538 :             break;
    2314             : 
    2315       14486 :         case GDT_Float64:
    2316       14486 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
    2317             :                                              bAvoidNoDataSingleBand);
    2318       14486 :             break;
    2319             : 
    2320           0 :         case GDT_CInt16:
    2321             :         case GDT_CInt32:
    2322             :         case GDT_CFloat16:
    2323             :         case GDT_CFloat32:
    2324             :         case GDT_CFloat64:
    2325           0 :             return false;
    2326             : 
    2327           0 :         case GDT_Unknown:
    2328             :         case GDT_TypeCount:
    2329           0 :             CPLAssert(false);
    2330             :             return false;
    2331             :     }
    2332             : 
    2333     1347980 :     return true;
    2334             : }
    2335             : 
    2336             : /************************************************************************/
    2337             : /*                          GWKGetPixelValue()                          */
    2338             : /************************************************************************/
    2339             : 
    2340             : /* It is assumed that panUnifiedSrcValid has been checked before */
    2341             : 
    2342    40173600 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
    2343             :                              GPtrDiff_t iSrcOffset, double *pdfDensity,
    2344             :                              double *pdfReal, double *pdfImag)
    2345             : 
    2346             : {
    2347    40173600 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2348             : 
    2349    80347200 :     if (poWK->papanBandSrcValid != nullptr &&
    2350    40173600 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2351           0 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2352             :     {
    2353           0 :         *pdfDensity = 0.0;
    2354           0 :         return false;
    2355             :     }
    2356             : 
    2357    40173600 :     *pdfReal = 0.0;
    2358    40173600 :     *pdfImag = 0.0;
    2359             : 
    2360             :     // TODO(schwehr): Fix casting.
    2361    40173600 :     switch (poWK->eWorkingDataType)
    2362             :     {
    2363    39096600 :         case GDT_UInt8:
    2364    39096600 :             *pdfReal = pabySrc[iSrcOffset];
    2365    39096600 :             *pdfImag = 0.0;
    2366    39096600 :             break;
    2367             : 
    2368           3 :         case GDT_Int8:
    2369           3 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2370           3 :             *pdfImag = 0.0;
    2371           3 :             break;
    2372             : 
    2373       28229 :         case GDT_Int16:
    2374       28229 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2375       28229 :             *pdfImag = 0.0;
    2376       28229 :             break;
    2377             : 
    2378         166 :         case GDT_UInt16:
    2379         166 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2380         166 :             *pdfImag = 0.0;
    2381         166 :             break;
    2382             : 
    2383          63 :         case GDT_Int32:
    2384          63 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2385          63 :             *pdfImag = 0.0;
    2386          63 :             break;
    2387             : 
    2388          63 :         case GDT_UInt32:
    2389          63 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2390          63 :             *pdfImag = 0.0;
    2391          63 :             break;
    2392             : 
    2393           0 :         case GDT_Int64:
    2394           0 :             *pdfReal = static_cast<double>(
    2395           0 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2396           0 :             *pdfImag = 0.0;
    2397           0 :             break;
    2398             : 
    2399           0 :         case GDT_UInt64:
    2400           0 :             *pdfReal = static_cast<double>(
    2401           0 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2402           0 :             *pdfImag = 0.0;
    2403           0 :             break;
    2404             : 
    2405           0 :         case GDT_Float16:
    2406           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2407           0 :             *pdfImag = 0.0;
    2408           0 :             break;
    2409             : 
    2410     1047220 :         case GDT_Float32:
    2411     1047220 :             *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
    2412     1047220 :             *pdfImag = 0.0;
    2413     1047220 :             break;
    2414             : 
    2415         587 :         case GDT_Float64:
    2416         587 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2417         587 :             *pdfImag = 0.0;
    2418         587 :             break;
    2419             : 
    2420         133 :         case GDT_CInt16:
    2421         133 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
    2422         133 :             *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2423         133 :             break;
    2424             : 
    2425         133 :         case GDT_CInt32:
    2426         133 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
    2427         133 :             *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
    2428         133 :             break;
    2429             : 
    2430           0 :         case GDT_CFloat16:
    2431           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
    2432           0 :             *pdfImag =
    2433           0 :                 reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2434           0 :             break;
    2435             : 
    2436         194 :         case GDT_CFloat32:
    2437         194 :             *pdfReal =
    2438         194 :                 double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2]);
    2439         194 :             *pdfImag =
    2440         194 :                 double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1]);
    2441         194 :             break;
    2442             : 
    2443         138 :         case GDT_CFloat64:
    2444         138 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
    2445         138 :             *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
    2446         138 :             break;
    2447             : 
    2448           0 :         case GDT_Unknown:
    2449             :         case GDT_TypeCount:
    2450           0 :             CPLAssert(false);
    2451             :             *pdfDensity = 0.0;
    2452             :             return false;
    2453             :     }
    2454             : 
    2455    40173600 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2456    12745700 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2457             :     else
    2458    27427800 :         *pdfDensity = 1.0;
    2459             : 
    2460    40173600 :     return *pdfDensity != 0.0;
    2461             : }
    2462             : 
    2463             : /************************************************************************/
    2464             : /*                        GWKGetPixelValueReal()                        */
    2465             : /************************************************************************/
    2466             : 
    2467       15516 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2468             :                                  GPtrDiff_t iSrcOffset, double *pdfDensity,
    2469             :                                  double *pdfReal)
    2470             : 
    2471             : {
    2472       15516 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2473             : 
    2474       31034 :     if (poWK->papanBandSrcValid != nullptr &&
    2475       15518 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2476           2 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2477             :     {
    2478           0 :         *pdfDensity = 0.0;
    2479           0 :         return false;
    2480             :     }
    2481             : 
    2482       15516 :     switch (poWK->eWorkingDataType)
    2483             :     {
    2484           1 :         case GDT_UInt8:
    2485           1 :             *pdfReal = pabySrc[iSrcOffset];
    2486           1 :             break;
    2487             : 
    2488           0 :         case GDT_Int8:
    2489           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2490           0 :             break;
    2491             : 
    2492           1 :         case GDT_Int16:
    2493           1 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2494           1 :             break;
    2495             : 
    2496           1 :         case GDT_UInt16:
    2497           1 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2498           1 :             break;
    2499             : 
    2500         982 :         case GDT_Int32:
    2501         982 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2502         982 :             break;
    2503             : 
    2504         179 :         case GDT_UInt32:
    2505         179 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2506         179 :             break;
    2507             : 
    2508         112 :         case GDT_Int64:
    2509         112 :             *pdfReal = static_cast<double>(
    2510         112 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2511         112 :             break;
    2512             : 
    2513         112 :         case GDT_UInt64:
    2514         112 :             *pdfReal = static_cast<double>(
    2515         112 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2516         112 :             break;
    2517             : 
    2518           0 :         case GDT_Float16:
    2519           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2520           0 :             break;
    2521             : 
    2522           2 :         case GDT_Float32:
    2523           2 :             *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
    2524           2 :             break;
    2525             : 
    2526       14126 :         case GDT_Float64:
    2527       14126 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2528       14126 :             break;
    2529             : 
    2530           0 :         case GDT_CInt16:
    2531             :         case GDT_CInt32:
    2532             :         case GDT_CFloat16:
    2533             :         case GDT_CFloat32:
    2534             :         case GDT_CFloat64:
    2535             :         case GDT_Unknown:
    2536             :         case GDT_TypeCount:
    2537           0 :             CPLAssert(false);
    2538             :             return false;
    2539             :     }
    2540             : 
    2541       15516 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2542           0 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2543             :     else
    2544       15516 :         *pdfDensity = 1.0;
    2545             : 
    2546       15516 :     return *pdfDensity != 0.0;
    2547             : }
    2548             : 
    2549             : /************************************************************************/
    2550             : /*                           GWKGetPixelRow()                           */
    2551             : /************************************************************************/
    2552             : 
    2553             : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
    2554             : /* data-types. */
    2555             : 
    2556     2369710 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
    2557             :                            GPtrDiff_t iSrcOffset, int nHalfSrcLen,
    2558             :                            double *padfDensity, double adfReal[],
    2559             :                            double *padfImag)
    2560             : {
    2561             :     // We know that nSrcLen is even, so we can *always* unroll loops 2x.
    2562     2369710 :     const int nSrcLen = nHalfSrcLen * 2;
    2563     2369710 :     bool bHasValid = false;
    2564             : 
    2565     2369710 :     if (padfDensity != nullptr)
    2566             :     {
    2567             :         // Init the density.
    2568     3384030 :         for (int i = 0; i < nSrcLen; i += 2)
    2569             :         {
    2570     2211910 :             padfDensity[i] = 1.0;
    2571     2211910 :             padfDensity[i + 1] = 1.0;
    2572             :         }
    2573             : 
    2574     1172120 :         if (poWK->panUnifiedSrcValid != nullptr)
    2575             :         {
    2576     3281460 :             for (int i = 0; i < nSrcLen; i += 2)
    2577             :             {
    2578     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
    2579     2067740 :                     bHasValid = true;
    2580             :                 else
    2581       74323 :                     padfDensity[i] = 0.0;
    2582             : 
    2583     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
    2584     2068400 :                     bHasValid = true;
    2585             :                 else
    2586       73668 :                     padfDensity[i + 1] = 0.0;
    2587             :             }
    2588             : 
    2589             :             // Reset or fail as needed.
    2590     1139400 :             if (bHasValid)
    2591     1116590 :                 bHasValid = false;
    2592             :             else
    2593       22806 :                 return false;
    2594             :         }
    2595             : 
    2596     1149320 :         if (poWK->papanBandSrcValid != nullptr &&
    2597           0 :             poWK->papanBandSrcValid[iBand] != nullptr)
    2598             :         {
    2599           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2600             :             {
    2601           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
    2602           0 :                     bHasValid = true;
    2603             :                 else
    2604           0 :                     padfDensity[i] = 0.0;
    2605             : 
    2606           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
    2607           0 :                                iSrcOffset + i + 1))
    2608           0 :                     bHasValid = true;
    2609             :                 else
    2610           0 :                     padfDensity[i + 1] = 0.0;
    2611             :             }
    2612             : 
    2613             :             // Reset or fail as needed.
    2614           0 :             if (bHasValid)
    2615           0 :                 bHasValid = false;
    2616             :             else
    2617           0 :                 return false;
    2618             :         }
    2619             :     }
    2620             : 
    2621             :     // TODO(schwehr): Fix casting.
    2622             :     // Fetch data.
    2623     2346910 :     switch (poWK->eWorkingDataType)
    2624             :     {
    2625     1136680 :         case GDT_UInt8:
    2626             :         {
    2627     1136680 :             GByte *pSrc =
    2628     1136680 :                 reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
    2629     1136680 :             pSrc += iSrcOffset;
    2630     3281570 :             for (int i = 0; i < nSrcLen; i += 2)
    2631             :             {
    2632     2144890 :                 adfReal[i] = pSrc[i];
    2633     2144890 :                 adfReal[i + 1] = pSrc[i + 1];
    2634             :             }
    2635     1136680 :             break;
    2636             :         }
    2637             : 
    2638         196 :         case GDT_Int8:
    2639             :         {
    2640         196 :             GInt8 *pSrc =
    2641         196 :                 reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
    2642         196 :             pSrc += iSrcOffset;
    2643         392 :             for (int i = 0; i < nSrcLen; i += 2)
    2644             :             {
    2645         196 :                 adfReal[i] = pSrc[i];
    2646         196 :                 adfReal[i + 1] = pSrc[i + 1];
    2647             :             }
    2648         196 :             break;
    2649             :         }
    2650             : 
    2651        5754 :         case GDT_Int16:
    2652             :         {
    2653        5754 :             GInt16 *pSrc =
    2654        5754 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2655        5754 :             pSrc += iSrcOffset;
    2656       21772 :             for (int i = 0; i < nSrcLen; i += 2)
    2657             :             {
    2658       16018 :                 adfReal[i] = pSrc[i];
    2659       16018 :                 adfReal[i + 1] = pSrc[i + 1];
    2660             :             }
    2661        5754 :             break;
    2662             :         }
    2663             : 
    2664        4310 :         case GDT_UInt16:
    2665             :         {
    2666        4310 :             GUInt16 *pSrc =
    2667        4310 :                 reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
    2668        4310 :             pSrc += iSrcOffset;
    2669       18884 :             for (int i = 0; i < nSrcLen; i += 2)
    2670             :             {
    2671       14574 :                 adfReal[i] = pSrc[i];
    2672       14574 :                 adfReal[i + 1] = pSrc[i + 1];
    2673             :             }
    2674        4310 :             break;
    2675             :         }
    2676             : 
    2677         946 :         case GDT_Int32:
    2678             :         {
    2679         946 :             GInt32 *pSrc =
    2680         946 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2681         946 :             pSrc += iSrcOffset;
    2682        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2683             :             {
    2684        1678 :                 adfReal[i] = pSrc[i];
    2685        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2686             :             }
    2687         946 :             break;
    2688             :         }
    2689             : 
    2690         946 :         case GDT_UInt32:
    2691             :         {
    2692         946 :             GUInt32 *pSrc =
    2693         946 :                 reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
    2694         946 :             pSrc += iSrcOffset;
    2695        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2696             :             {
    2697        1678 :                 adfReal[i] = pSrc[i];
    2698        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2699             :             }
    2700         946 :             break;
    2701             :         }
    2702             : 
    2703         196 :         case GDT_Int64:
    2704             :         {
    2705         196 :             auto pSrc =
    2706         196 :                 reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
    2707         196 :             pSrc += iSrcOffset;
    2708         392 :             for (int i = 0; i < nSrcLen; i += 2)
    2709             :             {
    2710         196 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2711         196 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2712             :             }
    2713         196 :             break;
    2714             :         }
    2715             : 
    2716         196 :         case GDT_UInt64:
    2717             :         {
    2718         196 :             auto pSrc =
    2719         196 :                 reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
    2720         196 :             pSrc += iSrcOffset;
    2721         392 :             for (int i = 0; i < nSrcLen; i += 2)
    2722             :             {
    2723         196 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2724         196 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2725             :             }
    2726         196 :             break;
    2727             :         }
    2728             : 
    2729           0 :         case GDT_Float16:
    2730             :         {
    2731           0 :             GFloat16 *pSrc =
    2732           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2733           0 :             pSrc += iSrcOffset;
    2734           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2735             :             {
    2736           0 :                 adfReal[i] = pSrc[i];
    2737           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2738             :             }
    2739           0 :             break;
    2740             :         }
    2741             : 
    2742       25270 :         case GDT_Float32:
    2743             :         {
    2744       25270 :             float *pSrc =
    2745       25270 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2746       25270 :             pSrc += iSrcOffset;
    2747      121739 :             for (int i = 0; i < nSrcLen; i += 2)
    2748             :             {
    2749       96469 :                 adfReal[i] = double(pSrc[i]);
    2750       96469 :                 adfReal[i + 1] = double(pSrc[i + 1]);
    2751             :             }
    2752       25270 :             break;
    2753             :         }
    2754             : 
    2755         946 :         case GDT_Float64:
    2756             :         {
    2757         946 :             double *pSrc =
    2758         946 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2759         946 :             pSrc += iSrcOffset;
    2760        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2761             :             {
    2762        1678 :                 adfReal[i] = pSrc[i];
    2763        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2764             :             }
    2765         946 :             break;
    2766             :         }
    2767             : 
    2768     1169220 :         case GDT_CInt16:
    2769             :         {
    2770     1169220 :             GInt16 *pSrc =
    2771     1169220 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2772     1169220 :             pSrc += 2 * iSrcOffset;
    2773     4676020 :             for (int i = 0; i < nSrcLen; i += 2)
    2774             :             {
    2775     3506800 :                 adfReal[i] = pSrc[2 * i];
    2776     3506800 :                 padfImag[i] = pSrc[2 * i + 1];
    2777             : 
    2778     3506800 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2779     3506800 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2780             :             }
    2781     1169220 :             break;
    2782             :         }
    2783             : 
    2784         750 :         case GDT_CInt32:
    2785             :         {
    2786         750 :             GInt32 *pSrc =
    2787         750 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2788         750 :             pSrc += 2 * iSrcOffset;
    2789        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2790             :             {
    2791        1482 :                 adfReal[i] = pSrc[2 * i];
    2792        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2793             : 
    2794        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2795        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2796             :             }
    2797         750 :             break;
    2798             :         }
    2799             : 
    2800           0 :         case GDT_CFloat16:
    2801             :         {
    2802           0 :             GFloat16 *pSrc =
    2803           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2804           0 :             pSrc += 2 * iSrcOffset;
    2805           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2806             :             {
    2807           0 :                 adfReal[i] = pSrc[2 * i];
    2808           0 :                 padfImag[i] = pSrc[2 * i + 1];
    2809             : 
    2810           0 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2811           0 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2812             :             }
    2813           0 :             break;
    2814             :         }
    2815             : 
    2816         750 :         case GDT_CFloat32:
    2817             :         {
    2818         750 :             float *pSrc =
    2819         750 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2820         750 :             pSrc += 2 * iSrcOffset;
    2821        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2822             :             {
    2823        1482 :                 adfReal[i] = double(pSrc[2 * i]);
    2824        1482 :                 padfImag[i] = double(pSrc[2 * i + 1]);
    2825             : 
    2826        1482 :                 adfReal[i + 1] = double(pSrc[2 * i + 2]);
    2827        1482 :                 padfImag[i + 1] = double(pSrc[2 * i + 3]);
    2828             :             }
    2829         750 :             break;
    2830             :         }
    2831             : 
    2832         750 :         case GDT_CFloat64:
    2833             :         {
    2834         750 :             double *pSrc =
    2835         750 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2836         750 :             pSrc += 2 * iSrcOffset;
    2837        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2838             :             {
    2839        1482 :                 adfReal[i] = pSrc[2 * i];
    2840        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2841             : 
    2842        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2843        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2844             :             }
    2845         750 :             break;
    2846             :         }
    2847             : 
    2848           0 :         case GDT_Unknown:
    2849             :         case GDT_TypeCount:
    2850           0 :             CPLAssert(false);
    2851             :             if (padfDensity)
    2852             :                 memset(padfDensity, 0, nSrcLen * sizeof(double));
    2853             :             return false;
    2854             :     }
    2855             : 
    2856     2346910 :     if (padfDensity == nullptr)
    2857     1197590 :         return true;
    2858             : 
    2859     1149320 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2860             :     {
    2861     3256740 :         for (int i = 0; i < nSrcLen; i += 2)
    2862             :         {
    2863             :             // Take into account earlier calcs.
    2864     2127390 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2865             :             {
    2866     2087480 :                 padfDensity[i] = 1.0;
    2867     2087480 :                 bHasValid = true;
    2868             :             }
    2869             : 
    2870     2127390 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2871             :             {
    2872     2088140 :                 padfDensity[i + 1] = 1.0;
    2873     2088140 :                 bHasValid = true;
    2874             :             }
    2875             :         }
    2876             :     }
    2877             :     else
    2878             :     {
    2879       70068 :         for (int i = 0; i < nSrcLen; i += 2)
    2880             :         {
    2881       50103 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2882       50103 :                 padfDensity[i] =
    2883       50103 :                     double(poWK->pafUnifiedSrcDensity[iSrcOffset + i]);
    2884       50103 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2885       49252 :                 bHasValid = true;
    2886             : 
    2887       50103 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2888       50103 :                 padfDensity[i + 1] =
    2889       50103 :                     double(poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]);
    2890       50103 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2891       49166 :                 bHasValid = true;
    2892             :         }
    2893             :     }
    2894             : 
    2895     1149320 :     return bHasValid;
    2896             : }
    2897             : 
    2898             : /************************************************************************/
    2899             : /*                            GWKGetPixelT()                            */
    2900             : /************************************************************************/
    2901             : 
    2902             : template <class T>
    2903    14964659 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
    2904             :                          GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
    2905             : 
    2906             : {
    2907    14964659 :     T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2908             : 
    2909    33172043 :     if ((poWK->panUnifiedSrcValid != nullptr &&
    2910    29929218 :          !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
    2911    14964659 :         (poWK->papanBandSrcValid != nullptr &&
    2912      589863 :          poWK->papanBandSrcValid[iBand] != nullptr &&
    2913      589863 :          !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
    2914             :     {
    2915           9 :         *pdfDensity = 0.0;
    2916           9 :         return false;
    2917             :     }
    2918             : 
    2919    14964559 :     *pValue = pSrc[iSrcOffset];
    2920             : 
    2921    14964559 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2922    13842266 :         *pdfDensity = 1.0;
    2923             :     else
    2924     1122362 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2925             : 
    2926    14964559 :     return *pdfDensity != 0.0;
    2927             : }
    2928             : 
    2929             : /************************************************************************/
    2930             : /*                        GWKBilinearResample()                         */
    2931             : /*     Set of bilinear interpolators                                    */
    2932             : /************************************************************************/
    2933             : 
    2934       77448 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2935             :                                        double dfSrcX, double dfSrcY,
    2936             :                                        double *pdfDensity, double *pdfReal,
    2937             :                                        double *pdfImag)
    2938             : 
    2939             : {
    2940             :     // Save as local variables to avoid following pointers.
    2941       77448 :     const int nSrcXSize = poWK->nSrcXSize;
    2942       77448 :     const int nSrcYSize = poWK->nSrcYSize;
    2943             : 
    2944       77448 :     int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2945       77448 :     int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2946       77448 :     double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2947       77448 :     double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2948       77448 :     bool bShifted = false;
    2949             : 
    2950       77448 :     if (iSrcX == -1)
    2951             :     {
    2952        1534 :         iSrcX = 0;
    2953        1534 :         dfRatioX = 1;
    2954             :     }
    2955       77448 :     if (iSrcY == -1)
    2956             :     {
    2957        7734 :         iSrcY = 0;
    2958        7734 :         dfRatioY = 1;
    2959             :     }
    2960       77448 :     GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    2961             : 
    2962             :     // Shift so we don't overrun the array.
    2963       77448 :     if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
    2964       77330 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
    2965       77330 :             iSrcOffset + nSrcXSize + 1)
    2966             :     {
    2967         230 :         bShifted = true;
    2968         230 :         --iSrcOffset;
    2969             :     }
    2970             : 
    2971       77448 :     double adfDensity[2] = {0.0, 0.0};
    2972       77448 :     double adfReal[2] = {0.0, 0.0};
    2973       77448 :     double adfImag[2] = {0.0, 0.0};
    2974       77448 :     double dfAccumulatorReal = 0.0;
    2975       77448 :     double dfAccumulatorImag = 0.0;
    2976       77448 :     double dfAccumulatorDensity = 0.0;
    2977       77448 :     double dfAccumulatorDivisor = 0.0;
    2978             : 
    2979       77448 :     const GPtrDiff_t nSrcPixels =
    2980       77448 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
    2981             :     // Get pixel row.
    2982       77448 :     if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
    2983      154896 :         iSrcOffset < nSrcPixels &&
    2984       77448 :         GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
    2985             :                        adfImag))
    2986             :     {
    2987       71504 :         double dfMult1 = dfRatioX * dfRatioY;
    2988       71504 :         double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
    2989             : 
    2990             :         // Shifting corrected.
    2991       71504 :         if (bShifted)
    2992             :         {
    2993         230 :             adfReal[0] = adfReal[1];
    2994         230 :             adfImag[0] = adfImag[1];
    2995         230 :             adfDensity[0] = adfDensity[1];
    2996             :         }
    2997             : 
    2998             :         // Upper Left Pixel.
    2999       71504 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    3000       71504 :             adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3001             :         {
    3002       66050 :             dfAccumulatorDivisor += dfMult1;
    3003             : 
    3004       66050 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    3005       66050 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    3006       66050 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    3007             :         }
    3008             : 
    3009             :         // Upper Right Pixel.
    3010       71504 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    3011       70609 :             adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3012             :         {
    3013       65335 :             dfAccumulatorDivisor += dfMult2;
    3014             : 
    3015       65335 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    3016       65335 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    3017       65335 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    3018             :         }
    3019             :     }
    3020             : 
    3021             :     // Get pixel row.
    3022       77448 :     if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
    3023      228032 :         iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
    3024       73136 :         GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
    3025             :                        adfReal, adfImag))
    3026             :     {
    3027       67577 :         double dfMult1 = dfRatioX * (1.0 - dfRatioY);
    3028       67577 :         double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    3029             : 
    3030             :         // Shifting corrected
    3031       67577 :         if (bShifted)
    3032             :         {
    3033         112 :             adfReal[0] = adfReal[1];
    3034         112 :             adfImag[0] = adfImag[1];
    3035         112 :             adfDensity[0] = adfDensity[1];
    3036             :         }
    3037             : 
    3038             :         // Lower Left Pixel
    3039       67577 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    3040       67577 :             adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3041             :         {
    3042       62298 :             dfAccumulatorDivisor += dfMult1;
    3043             : 
    3044       62298 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    3045       62298 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    3046       62298 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    3047             :         }
    3048             : 
    3049             :         // Lower Right Pixel.
    3050       67577 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    3051       66800 :             adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3052             :         {
    3053       61823 :             dfAccumulatorDivisor += dfMult2;
    3054             : 
    3055       61823 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    3056       61823 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    3057       61823 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    3058             :         }
    3059             :     }
    3060             : 
    3061             :     /* -------------------------------------------------------------------- */
    3062             :     /*      Return result.                                                  */
    3063             :     /* -------------------------------------------------------------------- */
    3064       77448 :     if (dfAccumulatorDivisor == 1.0)
    3065             :     {
    3066       45929 :         *pdfReal = dfAccumulatorReal;
    3067       45929 :         *pdfImag = dfAccumulatorImag;
    3068       45929 :         *pdfDensity = dfAccumulatorDensity;
    3069       45929 :         return false;
    3070             :     }
    3071       31519 :     else if (dfAccumulatorDivisor < 0.00001)
    3072             :     {
    3073           0 :         *pdfReal = 0.0;
    3074           0 :         *pdfImag = 0.0;
    3075           0 :         *pdfDensity = 0.0;
    3076           0 :         return false;
    3077             :     }
    3078             :     else
    3079             :     {
    3080       31519 :         *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
    3081       31519 :         *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
    3082       31519 :         *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
    3083       31519 :         return true;
    3084             :     }
    3085             : }
    3086             : 
    3087             : template <class T>
    3088     8979122 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3089             :                                                int iBand, double dfSrcX,
    3090             :                                                double dfSrcY, T *pValue)
    3091             : 
    3092             : {
    3093             : 
    3094     8979122 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3095     8979122 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3096     8979122 :     GPtrDiff_t iSrcOffset =
    3097     8979122 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3098     8979122 :     const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    3099     8979122 :     const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    3100             : 
    3101     8979122 :     const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    3102             : 
    3103     8979122 :     if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    3104     6224079 :         iSrcY + 1 < poWK->nSrcYSize)
    3105             :     {
    3106     6032592 :         const double dfAccumulator =
    3107     6032592 :             (double(pSrc[iSrcOffset]) * dfRatioX +
    3108     6032592 :              double(pSrc[iSrcOffset + 1]) * (1.0 - dfRatioX)) *
    3109             :                 dfRatioY +
    3110     6032592 :             (double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfRatioX +
    3111     6032592 :              double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) *
    3112     6032592 :                  (1.0 - dfRatioX)) *
    3113     6032592 :                 (1.0 - dfRatioY);
    3114             : 
    3115     6032592 :         *pValue = GWKRoundValueT<T>(dfAccumulator);
    3116             : 
    3117     6032592 :         return true;
    3118             :     }
    3119             : 
    3120     2946530 :     double dfAccumulatorDivisor = 0.0;
    3121     2946530 :     double dfAccumulator = 0.0;
    3122             : 
    3123             :     // Upper Left Pixel.
    3124     2946530 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
    3125      564876 :         iSrcY < poWK->nSrcYSize)
    3126             :     {
    3127      564876 :         const double dfMult = dfRatioX * dfRatioY;
    3128             : 
    3129      564876 :         dfAccumulatorDivisor += dfMult;
    3130             : 
    3131      564876 :         dfAccumulator += double(pSrc[iSrcOffset]) * dfMult;
    3132             :     }
    3133             : 
    3134             :     // Upper Right Pixel.
    3135     2946530 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    3136     2261926 :         iSrcY < poWK->nSrcYSize)
    3137             :     {
    3138     2261926 :         const double dfMult = (1.0 - dfRatioX) * dfRatioY;
    3139             : 
    3140     2261926 :         dfAccumulatorDivisor += dfMult;
    3141             : 
    3142     2261926 :         dfAccumulator += double(pSrc[iSrcOffset + 1]) * dfMult;
    3143             :     }
    3144             : 
    3145             :     // Lower Right Pixel.
    3146     2946530 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    3147     2512924 :         iSrcY + 1 < poWK->nSrcYSize)
    3148             :     {
    3149     2261243 :         const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    3150             : 
    3151     2261243 :         dfAccumulatorDivisor += dfMult;
    3152             : 
    3153     2261243 :         dfAccumulator +=
    3154     2261243 :             double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) * dfMult;
    3155             :     }
    3156             : 
    3157             :     // Lower Left Pixel.
    3158     2946530 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    3159      815601 :         iSrcY + 1 < poWK->nSrcYSize)
    3160             :     {
    3161      563917 :         const double dfMult = dfRatioX * (1.0 - dfRatioY);
    3162             : 
    3163      563917 :         dfAccumulatorDivisor += dfMult;
    3164             : 
    3165      563917 :         dfAccumulator += double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfMult;
    3166             :     }
    3167             : 
    3168             :     /* -------------------------------------------------------------------- */
    3169             :     /*      Return result.                                                  */
    3170             :     /* -------------------------------------------------------------------- */
    3171     2946530 :     double dfValue = 0.0;
    3172             : 
    3173     2946530 :     if (dfAccumulatorDivisor < 0.00001)
    3174             :     {
    3175           0 :         *pValue = 0;
    3176           0 :         return false;
    3177             :     }
    3178     2946530 :     else if (dfAccumulatorDivisor == 1.0)
    3179             :     {
    3180       22176 :         dfValue = dfAccumulator;
    3181             :     }
    3182             :     else
    3183             :     {
    3184     2924358 :         dfValue = dfAccumulator / dfAccumulatorDivisor;
    3185             :     }
    3186             : 
    3187     2946530 :     *pValue = GWKRoundValueT<T>(dfValue);
    3188             : 
    3189     2946530 :     return true;
    3190             : }
    3191             : 
    3192             : /************************************************************************/
    3193             : /*                        GWKCubicResample()                            */
    3194             : /*     Set of bicubic interpolators using cubic convolution.            */
    3195             : /************************************************************************/
    3196             : 
    3197             : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
    3198             : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
    3199             : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
    3200             : 
    3201             : template <typename T>
    3202     1810720 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
    3203             :                                  T f1, T f2, T f3)
    3204             : {
    3205     1810720 :     return (f1 + T(0.5) * (distance1 * (f2 - f0) +
    3206     1810720 :                            distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
    3207     1810720 :                            distance3 * (3 * (f1 - f2) + f3 - f0)));
    3208             : }
    3209             : 
    3210             : /************************************************************************/
    3211             : /*                       GWKCubicComputeWeights()                       */
    3212             : /************************************************************************/
    3213             : 
    3214             : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
    3215             : 
    3216             : template <typename T>
    3217    97781060 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
    3218             : {
    3219    97781060 :     const T halfX = T(0.5) * x;
    3220    97781060 :     const T threeX = T(3.0) * x;
    3221    97781060 :     const T halfX2 = halfX * x;
    3222             : 
    3223    97781060 :     coeffs[0] = halfX * (-1 + x * (2 - x));
    3224    97781060 :     coeffs[1] = 1 + halfX2 * (-5 + threeX);
    3225    97781060 :     coeffs[2] = halfX * (1 + x * (4 - threeX));
    3226    97781060 :     coeffs[3] = halfX2 * (-1 + x);
    3227    97781060 : }
    3228             : 
    3229    14682546 : template <typename T> inline double CONVOL4(const double v1[4], const T v2[4])
    3230             : {
    3231    14682546 :     return v1[0] * double(v2[0]) + v1[1] * double(v2[1]) +
    3232    14682546 :            v1[2] * double(v2[2]) + v1[3] * double(v2[3]);
    3233             : }
    3234             : 
    3235             : #if 0
    3236             : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
    3237             : // instead of 17.
    3238             : // TODO(schwehr): Use an inline function.
    3239             : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX)             \
    3240             :     {                                                                          \
    3241             :         const double dfX = dfX_;                                               \
    3242             :         dfHalfX = 0.5 * dfX;                                                   \
    3243             :         const double dfThreeX = 3.0 * dfX;                                     \
    3244             :         const double dfXMinus1 = dfX - 1;                                      \
    3245             :                                                                                \
    3246             :         adfCoeffs[0] = -1 + dfX * (2 - dfX);                                   \
    3247             :         adfCoeffs[1] = dfX * (-5 + dfThreeX);                                  \
    3248             :         /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/                           \
    3249             :         adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1];                              \
    3250             :         /*adfCoeffs[3] = dfX * (-1 + dfX); */                                  \
    3251             :         adfCoeffs[3] = dfXMinus1 - adfCoeffs[0];                               \
    3252             :     }
    3253             : 
    3254             : // TODO(schwehr): Use an inline function.
    3255             : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX)                               \
    3256             :     ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
    3257             :                            (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
    3258             : #endif
    3259             : 
    3260      302045 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
    3261             :                                     double dfSrcX, double dfSrcY,
    3262             :                                     double *pdfDensity, double *pdfReal,
    3263             :                                     double *pdfImag)
    3264             : 
    3265             : {
    3266      302045 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3267      302045 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3268      302045 :     GPtrDiff_t iSrcOffset =
    3269      302045 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3270      302045 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3271      302045 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3272      302045 :     double adfDensity[4] = {};
    3273      302045 :     double adfReal[4] = {};
    3274      302045 :     double adfImag[4] = {};
    3275             : 
    3276             :     // Get the bilinear interpolation at the image borders.
    3277      302045 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3278      286140 :         iSrcY + 2 >= poWK->nSrcYSize)
    3279       24670 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3280       24670 :                                           pdfDensity, pdfReal, pdfImag);
    3281             : 
    3282      277375 :     double adfValueDens[4] = {};
    3283      277375 :     double adfValueReal[4] = {};
    3284      277375 :     double adfValueImag[4] = {};
    3285             : 
    3286      277375 :     double adfCoeffsX[4] = {};
    3287      277375 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3288             : 
    3289     1240570 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3290             :     {
    3291     1009640 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3292      998035 :                             2, adfDensity, adfReal, adfImag) ||
    3293      998035 :             adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3294      980395 :             adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3295     2979770 :             adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3296      972094 :             adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
    3297             :         {
    3298       46449 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3299       46449 :                                               pdfDensity, pdfReal, pdfImag);
    3300             :         }
    3301             : 
    3302      963196 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3303      963196 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3304      963196 :         adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
    3305             :     }
    3306             : 
    3307             :     /* -------------------------------------------------------------------- */
    3308             :     /*      For now, if we have any pixels missing in the kernel area,      */
    3309             :     /*      we fallback on using bilinear interpolation.  Ideally we        */
    3310             :     /*      should do "weight adjustment" of our results similarly to       */
    3311             :     /*      what is done for the cubic spline and lanc. interpolators.      */
    3312             :     /* -------------------------------------------------------------------- */
    3313             : 
    3314      230926 :     double adfCoeffsY[4] = {};
    3315      230926 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3316             : 
    3317      230926 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3318      230926 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3319      230926 :     *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
    3320             : 
    3321      230926 :     return true;
    3322             : }
    3323             : 
    3324             : #ifdef USE_SSE2
    3325             : 
    3326             : /************************************************************************/
    3327             : /*                           XMMLoad4Values()                           */
    3328             : /*                                                                      */
    3329             : /*  Load 4 packed byte or uint16, cast them to float and put them in a  */
    3330             : /*  m128 register.                                                      */
    3331             : /************************************************************************/
    3332             : 
    3333   567016000 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
    3334             : {
    3335             :     unsigned int i;
    3336   567016000 :     memcpy(&i, ptr, 4);
    3337  1134030000 :     __m128i xmm_i = _mm_cvtsi32_si128(i);
    3338             :     // Zero extend 4 packed unsigned 8-bit integers in a to packed
    3339             :     // 32-bit integers.
    3340             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3341             :     xmm_i = _mm_cvtepu8_epi32(xmm_i);
    3342             : #else
    3343  1134030000 :     xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
    3344  1134030000 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3345             : #endif
    3346  1134030000 :     return _mm_cvtepi32_ps(xmm_i);
    3347             : }
    3348             : 
    3349     1108340 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
    3350             : {
    3351             :     GUInt64 i;
    3352     1108340 :     memcpy(&i, ptr, 8);
    3353     2216690 :     __m128i xmm_i = _mm_cvtsi64_si128(i);
    3354             :     // Zero extend 4 packed unsigned 16-bit integers in a to packed
    3355             :     // 32-bit integers.
    3356             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3357             :     xmm_i = _mm_cvtepu16_epi32(xmm_i);
    3358             : #else
    3359     2216690 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3360             : #endif
    3361     2216690 :     return _mm_cvtepi32_ps(xmm_i);
    3362             : }
    3363             : 
    3364             : /************************************************************************/
    3365             : /*                           XMMHorizontalAdd()                         */
    3366             : /*                                                                      */
    3367             : /*  Return the sum of the 4 floating points of the register.            */
    3368             : /************************************************************************/
    3369             : 
    3370             : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
    3371             : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3372             : {
    3373             :     __m128 shuf = _mm_movehdup_ps(v);   // (v3   , v3   , v1   , v1)
    3374             :     __m128 sums = _mm_add_ps(v, shuf);  // (v3+v3, v3+v2, v1+v1, v1+v0)
    3375             :     shuf = _mm_movehl_ps(shuf, sums);   // (v3   , v3   , v3+v3, v3+v2)
    3376             :     sums = _mm_add_ss(sums, shuf);      // (v1+v0)+(v3+v2)
    3377             :     return _mm_cvtss_f32(sums);
    3378             : }
    3379             : #else
    3380   142031000 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3381             : {
    3382   142031000 :     __m128 shuf = _mm_movehl_ps(v, v);     // (v3   , v2   , v3   , v2)
    3383   142031000 :     __m128 sums = _mm_add_ps(v, shuf);     // (v3+v3, v2+v2, v3+v1, v2+v0)
    3384   142031000 :     shuf = _mm_shuffle_ps(sums, sums, 1);  // (v2+v0, v2+v0, v2+v0, v3+v1)
    3385   142031000 :     sums = _mm_add_ss(sums, shuf);         // (v2+v0)+(v3+v1)
    3386   142031000 :     return _mm_cvtss_f32(sums);
    3387             : }
    3388             : #endif
    3389             : 
    3390             : #endif  // define USE_SSE2
    3391             : 
    3392             : /************************************************************************/
    3393             : /*            GWKCubicResampleSrcMaskIsDensity4SampleRealT()            */
    3394             : /************************************************************************/
    3395             : 
    3396             : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
    3397             : // because there are a few assumptions above those types.
    3398             : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
    3399             : // perf benefit.
    3400             : 
    3401             : template <class T>
    3402      389755 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
    3403             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3404             :     double *pdfDensity, double *pdfReal)
    3405             : {
    3406      389755 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3407      389755 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3408      389755 :     const GPtrDiff_t iSrcOffset =
    3409      389755 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3410             : 
    3411             :     // Get the bilinear interpolation at the image borders.
    3412      389755 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3413      387271 :         iSrcY + 2 >= poWK->nSrcYSize)
    3414             :     {
    3415        2484 :         double adfImagIgnored[4] = {};
    3416        2484 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3417        2484 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3418             :     }
    3419             : 
    3420             : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3421             :     const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
    3422             :     const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
    3423             : 
    3424             :     // TODO(schwehr): Explain the magic numbers.
    3425             :     float afTemp[4 + 4 + 4 + 1];
    3426             :     float *pafAligned =
    3427             :         reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
    3428             :     float *pafCoeffs = pafAligned;
    3429             :     float *pafDensity = pafAligned + 4;
    3430             :     float *pafValue = pafAligned + 8;
    3431             : 
    3432             :     const float fHalfDeltaX = 0.5f * fDeltaX;
    3433             :     const float fThreeDeltaX = 3.0f * fDeltaX;
    3434             :     const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
    3435             : 
    3436             :     pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
    3437             :     pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
    3438             :     pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
    3439             :     pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
    3440             :     __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
    3441             :     const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD_FLOAT);
    3442             : 
    3443             :     __m128 xmmMaskLowDensity = _mm_setzero_ps();
    3444             :     for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
    3445             :          i++, iOffset += poWK->nSrcXSize)
    3446             :     {
    3447             :         const __m128 xmmDensity =
    3448             :             _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
    3449             :         xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
    3450             :                                       _mm_cmplt_ps(xmmDensity, xmmThreshold));
    3451             :         pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3452             : 
    3453             :         const __m128 xmmValues =
    3454             :             XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
    3455             :         pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
    3456             :     }
    3457             :     if (_mm_movemask_ps(xmmMaskLowDensity))
    3458             :     {
    3459             :         double adfImagIgnored[4] = {};
    3460             :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3461             :                                           pdfDensity, pdfReal, adfImagIgnored);
    3462             :     }
    3463             : 
    3464             :     const float fHalfDeltaY = 0.5f * fDeltaY;
    3465             :     const float fThreeDeltaY = 3.0f * fDeltaY;
    3466             :     const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
    3467             : 
    3468             :     pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
    3469             :     pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
    3470             :     pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
    3471             :     pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
    3472             : 
    3473             :     xmmCoeffs = _mm_load_ps(pafCoeffs);
    3474             : 
    3475             :     const __m128 xmmDensity = _mm_load_ps(pafDensity);
    3476             :     const __m128 xmmValue = _mm_load_ps(pafValue);
    3477             :     *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3478             :     *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
    3479             : 
    3480             :     // We did all above computations on float32 whereas the general case is
    3481             :     // float64. Not sure if one is fundamentally more correct than the other
    3482             :     // one, but we want our optimization to give the same result as the
    3483             :     // general case as much as possible, so if the resulting value is
    3484             :     // close to some_int_value + 0.5, redo the computation with the general
    3485             :     // case.
    3486             :     // Note: If other types than Byte or UInt16, will need changes.
    3487             :     if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
    3488             :         return true;
    3489             : 
    3490             : #endif  // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3491             : 
    3492      387271 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3493      387271 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3494             : 
    3495      387271 :     double adfValueDens[4] = {};
    3496      387271 :     double adfValueReal[4] = {};
    3497             : 
    3498      387271 :     double adfCoeffsX[4] = {};
    3499      387271 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3500             : 
    3501      387271 :     double adfCoeffsY[4] = {};
    3502      387271 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3503             : 
    3504     1930200 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3505             :     {
    3506     1544480 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3507             : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
    3508     1544480 :         if (poWK->pafUnifiedSrcDensity[iOffset + 0] <
    3509     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3510     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 1] <
    3511     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3512     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 2] <
    3513     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3514     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 3] <
    3515             :                 SRC_DENSITY_THRESHOLD_FLOAT)
    3516             :         {
    3517        1551 :             double adfImagIgnored[4] = {};
    3518        1551 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3519             :                                               pdfDensity, pdfReal,
    3520        1551 :                                               adfImagIgnored);
    3521             :         }
    3522             : #endif
    3523             : 
    3524     3085860 :         adfValueDens[i + 1] =
    3525     1542930 :             CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
    3526             : 
    3527     1542930 :         adfValueReal[i + 1] = CONVOL4(
    3528             :             adfCoeffsX,
    3529     1542930 :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3530             :     }
    3531             : 
    3532      385720 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3533      385720 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3534             : 
    3535      385720 :     return true;
    3536             : }
    3537             : 
    3538             : /************************************************************************/
    3539             : /*              GWKCubicResampleSrcMaskIsDensity4SampleReal()             */
    3540             : /*     Bi-cubic when source has and only has pafUnifiedSrcDensity.      */
    3541             : /************************************************************************/
    3542             : 
    3543           0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
    3544             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3545             :     double *pdfDensity, double *pdfReal)
    3546             : 
    3547             : {
    3548           0 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3549           0 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3550           0 :     const GPtrDiff_t iSrcOffset =
    3551           0 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3552           0 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3553           0 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3554             : 
    3555             :     // Get the bilinear interpolation at the image borders.
    3556           0 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3557           0 :         iSrcY + 2 >= poWK->nSrcYSize)
    3558             :     {
    3559           0 :         double adfImagIgnored[4] = {};
    3560           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3561           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3562             :     }
    3563             : 
    3564           0 :     double adfCoeffsX[4] = {};
    3565           0 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3566             : 
    3567           0 :     double adfCoeffsY[4] = {};
    3568           0 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3569             : 
    3570           0 :     double adfValueDens[4] = {};
    3571           0 :     double adfValueReal[4] = {};
    3572           0 :     double adfDensity[4] = {};
    3573           0 :     double adfReal[4] = {};
    3574           0 :     double adfImagIgnored[4] = {};
    3575             : 
    3576           0 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3577             :     {
    3578           0 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3579           0 :                             2, adfDensity, adfReal, adfImagIgnored) ||
    3580           0 :             adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3581           0 :             adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3582           0 :             adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3583           0 :             adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
    3584             :         {
    3585           0 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3586             :                                               pdfDensity, pdfReal,
    3587           0 :                                               adfImagIgnored);
    3588             :         }
    3589             : 
    3590           0 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3591           0 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3592             :     }
    3593             : 
    3594           0 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3595           0 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3596             : 
    3597           0 :     return true;
    3598             : }
    3599             : 
    3600             : template <class T>
    3601     2300964 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3602             :                                             int iBand, double dfSrcX,
    3603             :                                             double dfSrcY, T *pValue)
    3604             : 
    3605             : {
    3606     2300964 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3607     2300964 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3608     2300964 :     const GPtrDiff_t iSrcOffset =
    3609     2300964 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3610     2300964 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3611     2300964 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3612     2300964 :     const double dfDeltaY2 = dfDeltaY * dfDeltaY;
    3613     2300964 :     const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
    3614             : 
    3615             :     // Get the bilinear interpolation at the image borders.
    3616     2300964 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3617     1883033 :         iSrcY + 2 >= poWK->nSrcYSize)
    3618      490244 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    3619      490244 :                                                   pValue);
    3620             : 
    3621     1810720 :     double adfCoeffs[4] = {};
    3622     1810720 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
    3623             : 
    3624     1810720 :     double adfValue[4] = {};
    3625             : 
    3626     9053590 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3627             :     {
    3628     7242876 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3629             : 
    3630     7242876 :         adfValue[i + 1] = CONVOL4(
    3631             :             adfCoeffs,
    3632     7242876 :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3633             :     }
    3634             : 
    3635             :     const double dfValue =
    3636     1810720 :         CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
    3637             :                          adfValue[1], adfValue[2], adfValue[3]);
    3638             : 
    3639     1810720 :     *pValue = GWKClampValueT<T>(dfValue);
    3640             : 
    3641     1810720 :     return true;
    3642             : }
    3643             : 
    3644             : /************************************************************************/
    3645             : /*                           GWKLanczosSinc()                           */
    3646             : /************************************************************************/
    3647             : 
    3648             : /*
    3649             :  * Lanczos windowed sinc interpolation kernel with radius r.
    3650             :  *        /
    3651             :  *        | sinc(x) * sinc(x/r), if |x| < r
    3652             :  * L(x) = | 1, if x = 0                     ,
    3653             :  *        | 0, otherwise
    3654             :  *        \
    3655             :  *
    3656             :  * where sinc(x) = sin(PI * x) / (PI * x).
    3657             :  */
    3658             : 
    3659        1632 : static double GWKLanczosSinc(double dfX)
    3660             : {
    3661        1632 :     if (dfX == 0.0)
    3662           0 :         return 1.0;
    3663             : 
    3664        1632 :     const double dfPIX = M_PI * dfX;
    3665        1632 :     const double dfPIXoverR = dfPIX / 3;
    3666        1632 :     const double dfPIX2overR = dfPIX * dfPIXoverR;
    3667             :     // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3668             :     // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3669        1632 :     const double dfSinPIXoverR = sin(dfPIXoverR);
    3670        1632 :     const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3671        1632 :     const double dfSinPIXMulSinPIXoverR =
    3672        1632 :         (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3673        1632 :     return dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3674             : }
    3675             : 
    3676      106692 : static double GWKLanczosSinc4Values(double *padfValues)
    3677             : {
    3678      533460 :     for (int i = 0; i < 4; i++)
    3679             :     {
    3680      426768 :         if (padfValues[i] == 0.0)
    3681             :         {
    3682           0 :             padfValues[i] = 1.0;
    3683             :         }
    3684             :         else
    3685             :         {
    3686      426768 :             const double dfPIX = M_PI * padfValues[i];
    3687      426768 :             const double dfPIXoverR = dfPIX / 3;
    3688      426768 :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3689             :             // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3690             :             // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3691      426768 :             const double dfSinPIXoverR = sin(dfPIXoverR);
    3692      426768 :             const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3693      426768 :             const double dfSinPIXMulSinPIXoverR =
    3694      426768 :                 (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3695      426768 :             padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3696             :         }
    3697             :     }
    3698      106692 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3699             : }
    3700             : 
    3701             : /************************************************************************/
    3702             : /*                            GWKBilinear()                             */
    3703             : /************************************************************************/
    3704             : 
    3705     6336240 : static double GWKBilinear(double dfX)
    3706             : {
    3707     6336240 :     double dfAbsX = fabs(dfX);
    3708     6336240 :     if (dfAbsX <= 1.0)
    3709     5866920 :         return 1 - dfAbsX;
    3710             :     else
    3711      469322 :         return 0.0;
    3712             : }
    3713             : 
    3714      106410 : static double GWKBilinear4Values(double *padfValues)
    3715             : {
    3716      106410 :     double dfAbsX0 = fabs(padfValues[0]);
    3717      106410 :     double dfAbsX1 = fabs(padfValues[1]);
    3718      106410 :     double dfAbsX2 = fabs(padfValues[2]);
    3719      106410 :     double dfAbsX3 = fabs(padfValues[3]);
    3720      106410 :     if (dfAbsX0 <= 1.0)
    3721      106410 :         padfValues[0] = 1 - dfAbsX0;
    3722             :     else
    3723           0 :         padfValues[0] = 0.0;
    3724      106410 :     if (dfAbsX1 <= 1.0)
    3725      106410 :         padfValues[1] = 1 - dfAbsX1;
    3726             :     else
    3727           0 :         padfValues[1] = 0.0;
    3728      106410 :     if (dfAbsX2 <= 1.0)
    3729      106410 :         padfValues[2] = 1 - dfAbsX2;
    3730             :     else
    3731           0 :         padfValues[2] = 0.0;
    3732      106410 :     if (dfAbsX3 <= 1.0)
    3733      106394 :         padfValues[3] = 1 - dfAbsX3;
    3734             :     else
    3735          16 :         padfValues[3] = 0.0;
    3736      106410 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3737             : }
    3738             : 
    3739             : /************************************************************************/
    3740             : /*                              GWKCubic()                              */
    3741             : /************************************************************************/
    3742             : 
    3743       82838 : static double GWKCubic(double dfX)
    3744             : {
    3745       82838 :     return CubicKernel(dfX);
    3746             : }
    3747             : 
    3748     2442490 : static double GWKCubic4Values(double *padfValues)
    3749             : {
    3750     2442490 :     const double dfAbsX_0 = fabs(padfValues[0]);
    3751     2442490 :     const double dfAbsX_1 = fabs(padfValues[1]);
    3752     2442490 :     const double dfAbsX_2 = fabs(padfValues[2]);
    3753     2442490 :     const double dfAbsX_3 = fabs(padfValues[3]);
    3754     2442490 :     const double dfX2_0 = padfValues[0] * padfValues[0];
    3755     2442490 :     const double dfX2_1 = padfValues[1] * padfValues[1];
    3756     2442490 :     const double dfX2_2 = padfValues[2] * padfValues[2];
    3757     2442490 :     const double dfX2_3 = padfValues[3] * padfValues[3];
    3758             : 
    3759     2442490 :     double dfVal0 = 0.0;
    3760     2442490 :     if (dfAbsX_0 <= 1.0)
    3761      855505 :         dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
    3762     1586990 :     else if (dfAbsX_0 <= 2.0)
    3763     1586810 :         dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
    3764             : 
    3765     2442490 :     double dfVal1 = 0.0;
    3766     2442490 :     if (dfAbsX_1 <= 1.0)
    3767     1583220 :         dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
    3768      859273 :     else if (dfAbsX_1 <= 2.0)
    3769      859273 :         dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
    3770             : 
    3771     2442490 :     double dfVal2 = 0.0;
    3772     2442490 :     if (dfAbsX_2 <= 1.0)
    3773     1594220 :         dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
    3774      848269 :     else if (dfAbsX_2 <= 2.0)
    3775      848269 :         dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
    3776             : 
    3777     2442490 :     double dfVal3 = 0.0;
    3778     2442490 :     if (dfAbsX_3 <= 1.0)
    3779      866232 :         dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
    3780     1576260 :     else if (dfAbsX_3 <= 2.0)
    3781     1576100 :         dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
    3782             : 
    3783     2442490 :     padfValues[0] = dfVal0;
    3784     2442490 :     padfValues[1] = dfVal1;
    3785     2442490 :     padfValues[2] = dfVal2;
    3786     2442490 :     padfValues[3] = dfVal3;
    3787     2442490 :     return dfVal0 + dfVal1 + dfVal2 + dfVal3;
    3788             : }
    3789             : 
    3790             : /************************************************************************/
    3791             : /*                             GWKBSpline()                             */
    3792             : /************************************************************************/
    3793             : 
    3794             : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
    3795             : // Equation 8 with (B,C)=(1,0)
    3796             : // 1/6 * ( 3 * |x|^3 -  6 * |x|^2 + 4) |x| < 1
    3797             : // 1/6 * ( -|x|^3 + 6 |x|^2  - 12|x| + 8) |x| >= 1 and |x| < 2
    3798             : 
    3799      136640 : static double GWKBSpline(double x)
    3800             : {
    3801      136640 :     const double xp2 = x + 2.0;
    3802      136640 :     const double xp1 = x + 1.0;
    3803      136640 :     const double xm1 = x - 1.0;
    3804             : 
    3805             :     // This will most likely be used, so we'll compute it ahead of time to
    3806             :     // avoid stalling the processor.
    3807      136640 :     const double xp2c = xp2 * xp2 * xp2;
    3808             : 
    3809             :     // Note that the test is computed only if it is needed.
    3810             :     // TODO(schwehr): Make this easier to follow.
    3811             :     return xp2 > 0.0
    3812      273280 :                ? ((xp1 > 0.0)
    3813      136640 :                       ? ((x > 0.0)
    3814      122246 :                              ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3815       87748 :                                    6.0 * x * x * x
    3816             :                              : 0.0) +
    3817      122246 :                             -4.0 * xp1 * xp1 * xp1
    3818             :                       : 0.0) +
    3819             :                      xp2c
    3820      136640 :                : 0.0;  // * 0.166666666666666666666
    3821             : }
    3822             : 
    3823     1895050 : static double GWKBSpline4Values(double *padfValues)
    3824             : {
    3825     9475260 :     for (int i = 0; i < 4; i++)
    3826             :     {
    3827     7580210 :         const double x = padfValues[i];
    3828     7580210 :         const double xp2 = x + 2.0;
    3829     7580210 :         const double xp1 = x + 1.0;
    3830     7580210 :         const double xm1 = x - 1.0;
    3831             : 
    3832             :         // This will most likely be used, so we'll compute it ahead of time to
    3833             :         // avoid stalling the processor.
    3834     7580210 :         const double xp2c = xp2 * xp2 * xp2;
    3835             : 
    3836             :         // Note that the test is computed only if it is needed.
    3837             :         // TODO(schwehr): Make this easier to follow.
    3838     7580210 :         padfValues[i] =
    3839             :             (xp2 > 0.0)
    3840    15103600 :                 ? ((xp1 > 0.0)
    3841     7523380 :                        ? ((x > 0.0)
    3842     5656250 :                               ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3843     3788270 :                                     6.0 * x * x * x
    3844             :                               : 0.0) +
    3845     5656250 :                              -4.0 * xp1 * xp1 * xp1
    3846             :                        : 0.0) +
    3847             :                       xp2c
    3848             :                 : 0.0;  // * 0.166666666666666666666
    3849             :     }
    3850     1895050 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3851             : }
    3852             : /************************************************************************/
    3853             : /*                         GWKResampleWrkStruct                         */
    3854             : /************************************************************************/
    3855             : 
    3856             : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
    3857             : 
    3858             : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
    3859             :                                    double dfSrcX, double dfSrcY,
    3860             :                                    double *pdfDensity, double *pdfReal,
    3861             :                                    double *pdfImag,
    3862             :                                    GWKResampleWrkStruct *psWrkStruct);
    3863             : 
    3864             : struct _GWKResampleWrkStruct
    3865             : {
    3866             :     pfnGWKResampleType pfnGWKResample;
    3867             : 
    3868             :     // Space for saved X weights.
    3869             :     double *padfWeightsX;
    3870             :     bool *pabCalcX;
    3871             : 
    3872             :     double *padfWeightsY;       // Only used by GWKResampleOptimizedLanczos.
    3873             :     int iLastSrcX;              // Only used by GWKResampleOptimizedLanczos.
    3874             :     int iLastSrcY;              // Only used by GWKResampleOptimizedLanczos.
    3875             :     double dfLastDeltaX;        // Only used by GWKResampleOptimizedLanczos.
    3876             :     double dfLastDeltaY;        // Only used by GWKResampleOptimizedLanczos.
    3877             :     double dfCosPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3878             :     double dfSinPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3879             :     double dfCosPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3880             :     double dfSinPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3881             :     double dfCosPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3882             :     double dfSinPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3883             :     double dfCosPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3884             :     double dfSinPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3885             : 
    3886             :     // Space for saving a row of pixels.
    3887             :     double *padfRowDensity;
    3888             :     double *padfRowReal;
    3889             :     double *padfRowImag;
    3890             : };
    3891             : 
    3892             : /************************************************************************/
    3893             : /*                     GWKResampleCreateWrkStruct()                     */
    3894             : /************************************************************************/
    3895             : 
    3896             : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3897             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3898             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
    3899             : 
    3900             : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3901             :                                         double dfSrcX, double dfSrcY,
    3902             :                                         double *pdfDensity, double *pdfReal,
    3903             :                                         double *pdfImag,
    3904             :                                         GWKResampleWrkStruct *psWrkStruct);
    3905             : 
    3906         401 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
    3907             : {
    3908         401 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3909         401 :     const int nYDist = (poWK->nYRadius + 1) * 2;
    3910             : 
    3911             :     GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
    3912         401 :         CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
    3913             : 
    3914             :     // Alloc space for saved X weights.
    3915         401 :     psWrkStruct->padfWeightsX =
    3916         401 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3917         401 :     psWrkStruct->pabCalcX =
    3918         401 :         static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
    3919             : 
    3920         401 :     psWrkStruct->padfWeightsY =
    3921         401 :         static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
    3922         401 :     psWrkStruct->iLastSrcX = -10;
    3923         401 :     psWrkStruct->iLastSrcY = -10;
    3924         401 :     psWrkStruct->dfLastDeltaX = -10;
    3925         401 :     psWrkStruct->dfLastDeltaY = -10;
    3926             : 
    3927             :     // Alloc space for saving a row of pixels.
    3928         401 :     if (poWK->pafUnifiedSrcDensity == nullptr &&
    3929         365 :         poWK->panUnifiedSrcValid == nullptr &&
    3930         342 :         poWK->papanBandSrcValid == nullptr)
    3931             :     {
    3932         342 :         psWrkStruct->padfRowDensity = nullptr;
    3933             :     }
    3934             :     else
    3935             :     {
    3936          59 :         psWrkStruct->padfRowDensity =
    3937          59 :             static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3938             :     }
    3939         401 :     psWrkStruct->padfRowReal =
    3940         401 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3941         401 :     psWrkStruct->padfRowImag =
    3942         401 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3943             : 
    3944         401 :     if (poWK->eResample == GRA_Lanczos)
    3945             :     {
    3946          65 :         psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
    3947             : 
    3948          65 :         if (poWK->dfXScale < 1)
    3949             :         {
    3950           4 :             psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
    3951           4 :             psWrkStruct->dfSinPiXScaleOver3 =
    3952           4 :                 sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
    3953           4 :                              psWrkStruct->dfCosPiXScaleOver3);
    3954             :             // "Naive":
    3955             :             // const double dfCosPiXScale = cos(  M_PI * dfXScale );
    3956             :             // const double dfSinPiXScale = sin(  M_PI * dfXScale );
    3957             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3958           4 :             psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
    3959           4 :                                               psWrkStruct->dfCosPiXScaleOver3 -
    3960           4 :                                           3) *
    3961           4 :                                          psWrkStruct->dfCosPiXScaleOver3;
    3962           4 :             psWrkStruct->dfSinPiXScale = sqrt(
    3963           4 :                 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
    3964             :         }
    3965             : 
    3966          65 :         if (poWK->dfYScale < 1)
    3967             :         {
    3968          12 :             psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
    3969          12 :             psWrkStruct->dfSinPiYScaleOver3 =
    3970          12 :                 sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
    3971          12 :                              psWrkStruct->dfCosPiYScaleOver3);
    3972             :             // "Naive":
    3973             :             // const double dfCosPiYScale = cos(  M_PI * dfYScale );
    3974             :             // const double dfSinPiYScale = sin(  M_PI * dfYScale );
    3975             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3976          12 :             psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
    3977          12 :                                               psWrkStruct->dfCosPiYScaleOver3 -
    3978          12 :                                           3) *
    3979          12 :                                          psWrkStruct->dfCosPiYScaleOver3;
    3980          12 :             psWrkStruct->dfSinPiYScale = sqrt(
    3981          12 :                 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
    3982             :         }
    3983             :     }
    3984             :     else
    3985         336 :         psWrkStruct->pfnGWKResample = GWKResample;
    3986             : 
    3987         401 :     return psWrkStruct;
    3988             : }
    3989             : 
    3990             : /************************************************************************/
    3991             : /*                     GWKResampleDeleteWrkStruct()                     */
    3992             : /************************************************************************/
    3993             : 
    3994         401 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
    3995             : {
    3996         401 :     CPLFree(psWrkStruct->padfWeightsX);
    3997         401 :     CPLFree(psWrkStruct->padfWeightsY);
    3998         401 :     CPLFree(psWrkStruct->pabCalcX);
    3999         401 :     CPLFree(psWrkStruct->padfRowDensity);
    4000         401 :     CPLFree(psWrkStruct->padfRowReal);
    4001         401 :     CPLFree(psWrkStruct->padfRowImag);
    4002         401 :     CPLFree(psWrkStruct);
    4003         401 : }
    4004             : 
    4005             : /************************************************************************/
    4006             : /*                            GWKResample()                             */
    4007             : /************************************************************************/
    4008             : 
    4009      239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    4010             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    4011             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
    4012             : 
    4013             : {
    4014             :     // Save as local variables to avoid following pointers in loops.
    4015      239383 :     const int nSrcXSize = poWK->nSrcXSize;
    4016      239383 :     const int nSrcYSize = poWK->nSrcYSize;
    4017             : 
    4018      239383 :     double dfAccumulatorReal = 0.0;
    4019      239383 :     double dfAccumulatorImag = 0.0;
    4020      239383 :     double dfAccumulatorDensity = 0.0;
    4021      239383 :     double dfAccumulatorWeight = 0.0;
    4022      239383 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4023      239383 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4024      239383 :     const GPtrDiff_t iSrcOffset =
    4025      239383 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4026      239383 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4027      239383 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4028             : 
    4029      239383 :     const double dfXScale = poWK->dfXScale;
    4030      239383 :     const double dfYScale = poWK->dfYScale;
    4031             : 
    4032      239383 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    4033             : 
    4034             :     // Space for saved X weights.
    4035      239383 :     double *padfWeightsX = psWrkStruct->padfWeightsX;
    4036      239383 :     bool *pabCalcX = psWrkStruct->pabCalcX;
    4037             : 
    4038             :     // Space for saving a row of pixels.
    4039      239383 :     double *padfRowDensity = psWrkStruct->padfRowDensity;
    4040      239383 :     double *padfRowReal = psWrkStruct->padfRowReal;
    4041      239383 :     double *padfRowImag = psWrkStruct->padfRowImag;
    4042             : 
    4043             :     // Mark as needing calculation (don't calculate the weights yet,
    4044             :     // because a mask may render it unnecessary).
    4045      239383 :     memset(pabCalcX, false, nXDist * sizeof(bool));
    4046             : 
    4047      239383 :     FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
    4048      239383 :     CPLAssert(pfnGetWeight);
    4049             : 
    4050             :     // Skip sampling over edge of image.
    4051      239383 :     int j = poWK->nFiltInitY;
    4052      239383 :     int jMax = poWK->nYRadius;
    4053      239383 :     if (iSrcY + j < 0)
    4054         566 :         j = -iSrcY;
    4055      239383 :     if (iSrcY + jMax >= nSrcYSize)
    4056         662 :         jMax = nSrcYSize - iSrcY - 1;
    4057             : 
    4058      239383 :     int iMin = poWK->nFiltInitX;
    4059      239383 :     int iMax = poWK->nXRadius;
    4060      239383 :     if (iSrcX + iMin < 0)
    4061         566 :         iMin = -iSrcX;
    4062      239383 :     if (iSrcX + iMax >= nSrcXSize)
    4063         659 :         iMax = nSrcXSize - iSrcX - 1;
    4064             : 
    4065      239383 :     const int bXScaleBelow1 = (dfXScale < 1.0);
    4066      239383 :     const int bYScaleBelow1 = (dfYScale < 1.0);
    4067             : 
    4068      239383 :     GPtrDiff_t iRowOffset =
    4069      239383 :         iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
    4070             : 
    4071             :     // Loop over pixel rows in the kernel.
    4072     1445930 :     for (; j <= jMax; ++j)
    4073             :     {
    4074     1206540 :         iRowOffset += nSrcXSize;
    4075             : 
    4076             :         // Get pixel values.
    4077             :         // We can potentially read extra elements after the "normal" end of the
    4078             :         // source arrays, but the contract of papabySrcImage[iBand],
    4079             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4080             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4081     1206540 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4082             :                             padfRowDensity, padfRowReal, padfRowImag))
    4083          72 :             continue;
    4084             : 
    4085             :         // Calculate the Y weight.
    4086             :         double dfWeight1 = (bYScaleBelow1)
    4087     1206470 :                                ? pfnGetWeight((j - dfDeltaY) * dfYScale)
    4088        1600 :                                : pfnGetWeight(j - dfDeltaY);
    4089             : 
    4090             :         // Iterate over pixels in row.
    4091     1206470 :         double dfAccumulatorRealLocal = 0.0;
    4092     1206470 :         double dfAccumulatorImagLocal = 0.0;
    4093     1206470 :         double dfAccumulatorDensityLocal = 0.0;
    4094     1206470 :         double dfAccumulatorWeightLocal = 0.0;
    4095             : 
    4096     7317420 :         for (int i = iMin; i <= iMax; ++i)
    4097             :         {
    4098             :             // Skip sampling if pixel has zero density.
    4099     6110940 :             if (padfRowDensity != nullptr &&
    4100       77277 :                 padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
    4101         546 :                 continue;
    4102             : 
    4103     6110400 :             double dfWeight2 = 0.0;
    4104             : 
    4105             :             // Make or use a cached set of weights for this row.
    4106     6110400 :             if (pabCalcX[i - iMin])
    4107             :             {
    4108             :                 // Use saved weight value instead of recomputing it.
    4109     4903920 :                 dfWeight2 = padfWeightsX[i - iMin];
    4110             :             }
    4111             :             else
    4112             :             {
    4113             :                 // Calculate & save the X weight.
    4114     1206480 :                 padfWeightsX[i - iMin] = dfWeight2 =
    4115     1206480 :                     (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
    4116        1600 :                                     : pfnGetWeight(i - dfDeltaX);
    4117             : 
    4118     1206480 :                 pabCalcX[i - iMin] = true;
    4119             :             }
    4120             : 
    4121             :             // Accumulate!
    4122     6110400 :             dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
    4123     6110400 :             dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
    4124     6110400 :             if (padfRowDensity != nullptr)
    4125       76731 :                 dfAccumulatorDensityLocal +=
    4126       76731 :                     padfRowDensity[i - iMin] * dfWeight2;
    4127     6110400 :             dfAccumulatorWeightLocal += dfWeight2;
    4128             :         }
    4129             : 
    4130     1206470 :         dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
    4131     1206470 :         dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
    4132     1206470 :         dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
    4133     1206470 :         dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
    4134             :     }
    4135             : 
    4136      239383 :     if (dfAccumulatorWeight < 0.000001 ||
    4137        1887 :         (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
    4138             :     {
    4139           0 :         *pdfDensity = 0.0;
    4140           0 :         return false;
    4141             :     }
    4142             : 
    4143             :     // Calculate the output taking into account weighting.
    4144      239383 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4145             :     {
    4146      239380 :         *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
    4147      239380 :         *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
    4148      239380 :         if (padfRowDensity != nullptr)
    4149        1884 :             *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
    4150             :         else
    4151      237496 :             *pdfDensity = 1.0;
    4152             :     }
    4153             :     else
    4154             :     {
    4155           3 :         *pdfReal = dfAccumulatorReal;
    4156           3 :         *pdfImag = dfAccumulatorImag;
    4157           3 :         if (padfRowDensity != nullptr)
    4158           3 :             *pdfDensity = dfAccumulatorDensity;
    4159             :         else
    4160           0 :             *pdfDensity = 1.0;
    4161             :     }
    4162             : 
    4163      239383 :     return true;
    4164             : }
    4165             : 
    4166             : /************************************************************************/
    4167             : /*                    GWKResampleOptimizedLanczos()                     */
    4168             : /************************************************************************/
    4169             : 
    4170      634574 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    4171             :                                         double dfSrcX, double dfSrcY,
    4172             :                                         double *pdfDensity, double *pdfReal,
    4173             :                                         double *pdfImag,
    4174             :                                         GWKResampleWrkStruct *psWrkStruct)
    4175             : 
    4176             : {
    4177             :     // Save as local variables to avoid following pointers in loops.
    4178      634574 :     const int nSrcXSize = poWK->nSrcXSize;
    4179      634574 :     const int nSrcYSize = poWK->nSrcYSize;
    4180             : 
    4181      634574 :     double dfAccumulatorReal = 0.0;
    4182      634574 :     double dfAccumulatorImag = 0.0;
    4183      634574 :     double dfAccumulatorDensity = 0.0;
    4184      634574 :     double dfAccumulatorWeight = 0.0;
    4185      634574 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4186      634574 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4187      634574 :     const GPtrDiff_t iSrcOffset =
    4188      634574 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4189      634574 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4190      634574 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4191             : 
    4192      634574 :     const double dfXScale = poWK->dfXScale;
    4193      634574 :     const double dfYScale = poWK->dfYScale;
    4194             : 
    4195             :     // Space for saved X weights.
    4196      634574 :     double *const padfWeightsXShifted =
    4197      634574 :         psWrkStruct->padfWeightsX - poWK->nFiltInitX;
    4198      634574 :     double *const padfWeightsYShifted =
    4199      634574 :         psWrkStruct->padfWeightsY - poWK->nFiltInitY;
    4200             : 
    4201             :     // Space for saving a row of pixels.
    4202      634574 :     double *const padfRowDensity = psWrkStruct->padfRowDensity;
    4203      634574 :     double *const padfRowReal = psWrkStruct->padfRowReal;
    4204      634574 :     double *const padfRowImag = psWrkStruct->padfRowImag;
    4205             : 
    4206             :     // Skip sampling over edge of image.
    4207      634574 :     int jMin = poWK->nFiltInitY;
    4208      634574 :     int jMax = poWK->nYRadius;
    4209      634574 :     if (iSrcY + jMin < 0)
    4210       17334 :         jMin = -iSrcY;
    4211      634574 :     if (iSrcY + jMax >= nSrcYSize)
    4212        5638 :         jMax = nSrcYSize - iSrcY - 1;
    4213             : 
    4214      634574 :     int iMin = poWK->nFiltInitX;
    4215      634574 :     int iMax = poWK->nXRadius;
    4216      634574 :     if (iSrcX + iMin < 0)
    4217       19595 :         iMin = -iSrcX;
    4218      634574 :     if (iSrcX + iMax >= nSrcXSize)
    4219        6817 :         iMax = nSrcXSize - iSrcX - 1;
    4220             : 
    4221      634574 :     if (dfXScale < 1.0)
    4222             :     {
    4223      462945 :         while ((iMin - dfDeltaX) * dfXScale < -3.0)
    4224      260083 :             iMin++;
    4225      263534 :         while ((iMax - dfDeltaX) * dfXScale > 3.0)
    4226       60672 :             iMax--;
    4227             : 
    4228             :         // clang-format off
    4229             :         /*
    4230             :         Naive version:
    4231             :         for (int i = iMin; i <= iMax; ++i)
    4232             :         {
    4233             :             psWrkStruct->padfWeightsXShifted[i] =
    4234             :                 GWKLanczosSinc((i - dfDeltaX) * dfXScale);
    4235             :         }
    4236             : 
    4237             :         but given that:
    4238             : 
    4239             :         GWKLanczosSinc(x):
    4240             :             if (dfX == 0.0)
    4241             :                 return 1.0;
    4242             : 
    4243             :             const double dfPIX = M_PI * dfX;
    4244             :             const double dfPIXoverR = dfPIX / 3;
    4245             :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    4246             :             return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
    4247             : 
    4248             :         and
    4249             :             sin (a + b) = sin a cos b + cos a sin b.
    4250             :             cos (a + b) = cos a cos b - sin a sin b.
    4251             : 
    4252             :         we can skip any sin() computation within the loop
    4253             :         */
    4254             :         // clang-format on
    4255             : 
    4256      202862 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    4257      131072 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    4258             :         {
    4259       71790 :             double dfX = (iMin - dfDeltaX) * dfXScale;
    4260             : 
    4261       71790 :             double dfPIXover3 = M_PI / 3 * dfX;
    4262       71790 :             double dfCosOver3 = cos(dfPIXover3);
    4263       71790 :             double dfSinOver3 = sin(dfPIXover3);
    4264             : 
    4265             :             // "Naive":
    4266             :             // double dfSin = sin( M_PI * dfX );
    4267             :             // double dfCos = cos( M_PI * dfX );
    4268             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4269       71790 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4270       71790 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4271             : 
    4272       71790 :             const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
    4273       71790 :             const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
    4274       71790 :             const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
    4275       71790 :             const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
    4276       71790 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4277       71790 :             padfWeightsXShifted[iMin] =
    4278       71790 :                 dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
    4279      683646 :             for (int i = iMin + 1; i <= iMax; ++i)
    4280             :             {
    4281      611856 :                 dfX += dfXScale;
    4282      611856 :                 const double dfNewSin =
    4283      611856 :                     dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
    4284      611856 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
    4285      611856 :                                              dfCosOver3 * dfSinPiXScaleOver3;
    4286      611856 :                 padfWeightsXShifted[i] =
    4287             :                     dfX == 0
    4288      611856 :                         ? 1.0
    4289      611856 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
    4290      611856 :                 const double dfNewCos =
    4291      611856 :                     dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
    4292      611856 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
    4293      611856 :                                              dfSinOver3 * dfSinPiXScaleOver3;
    4294      611856 :                 dfSin = dfNewSin;
    4295      611856 :                 dfCos = dfNewCos;
    4296      611856 :                 dfSinOver3 = dfNewSinOver3;
    4297      611856 :                 dfCosOver3 = dfNewCosOver3;
    4298             :             }
    4299             : 
    4300       71790 :             psWrkStruct->iLastSrcX = iSrcX;
    4301       71790 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4302             :         }
    4303             :     }
    4304             :     else
    4305             :     {
    4306      789372 :         while (iMin - dfDeltaX < -3.0)
    4307      357660 :             iMin++;
    4308      431712 :         while (iMax - dfDeltaX > 3.0)
    4309           0 :             iMax--;
    4310             : 
    4311      431712 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    4312      225330 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    4313             :         {
    4314             :             // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
    4315             :             // following trigonometric formulas.
    4316             : 
    4317             :             // TODO(schwehr): Move this somewhere where it can be rendered at
    4318             :             // LaTeX.
    4319             :             // clang-format off
    4320             :             // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
    4321             :             //                            cos(M_PI * dfBase) * sin(M_PI * k)
    4322             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
    4323             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
    4324             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
    4325             : 
    4326             :             // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
    4327             :             //                                  cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
    4328             :             // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
    4329             :             // clang-format on
    4330             : 
    4331      420092 :             const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
    4332      420092 :             const double dfSin2PIDeltaXOver3 =
    4333             :                 dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
    4334             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
    4335      420092 :             const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
    4336      420092 :             const double dfSinPIDeltaX =
    4337      420092 :                 (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
    4338      420092 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4339      420092 :             const double dfInvPI2Over3xSinPIDeltaX =
    4340             :                 dfInvPI2Over3 * dfSinPIDeltaX;
    4341      420092 :             const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
    4342      420092 :                 -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
    4343      420092 :             const double dfSinPIOver3 = 0.8660254037844386;
    4344      420092 :             const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
    4345      420092 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
    4346             :             const double padfCst[] = {
    4347      420092 :                 dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
    4348      420092 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
    4349             :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
    4350      420092 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
    4351      420092 :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
    4352             : 
    4353     2974940 :             for (int i = iMin; i <= iMax; ++i)
    4354             :             {
    4355     2554850 :                 const double dfX = i - dfDeltaX;
    4356     2554850 :                 if (dfX == 0.0)
    4357       58282 :                     padfWeightsXShifted[i] = 1.0;
    4358             :                 else
    4359     2496570 :                     padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
    4360             : #if DEBUG_VERBOSE
    4361             :                 // TODO(schwehr): AlmostEqual.
    4362             :                 // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
    4363             :                 //               GWKLanczosSinc(dfX, 3.0)) < 1e-10);
    4364             : #endif
    4365             :             }
    4366             : 
    4367      420092 :             psWrkStruct->iLastSrcX = iSrcX;
    4368      420092 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4369             :         }
    4370             :     }
    4371             : 
    4372      634574 :     if (dfYScale < 1.0)
    4373             :     {
    4374       15754 :         while ((jMin - dfDeltaY) * dfYScale < -3.0)
    4375        9500 :             jMin++;
    4376        9854 :         while ((jMax - dfDeltaY) * dfYScale > 3.0)
    4377        3600 :             jMax--;
    4378             : 
    4379             :         // clang-format off
    4380             :         /*
    4381             :         Naive version:
    4382             :         for (int j = jMin; j <= jMax; ++j)
    4383             :         {
    4384             :             padfWeightsYShifted[j] =
    4385             :                 GWKLanczosSinc((j - dfDeltaY) * dfYScale);
    4386             :         }
    4387             :         */
    4388             :         // clang-format on
    4389             : 
    4390        6254 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4391        6127 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4392             :         {
    4393         127 :             double dfY = (jMin - dfDeltaY) * dfYScale;
    4394             : 
    4395         127 :             double dfPIYover3 = M_PI / 3 * dfY;
    4396         127 :             double dfCosOver3 = cos(dfPIYover3);
    4397         127 :             double dfSinOver3 = sin(dfPIYover3);
    4398             : 
    4399             :             // "Naive":
    4400             :             // double dfSin = sin( M_PI * dfY );
    4401             :             // double dfCos = cos( M_PI * dfY );
    4402             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4403         127 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4404         127 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4405             : 
    4406         127 :             const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
    4407         127 :             const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
    4408         127 :             const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
    4409         127 :             const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
    4410         127 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4411         127 :             padfWeightsYShifted[jMin] =
    4412         127 :                 dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
    4413        1210 :             for (int j = jMin + 1; j <= jMax; ++j)
    4414             :             {
    4415        1083 :                 dfY += dfYScale;
    4416        1083 :                 const double dfNewSin =
    4417        1083 :                     dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
    4418        1083 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
    4419        1083 :                                              dfCosOver3 * dfSinPiYScaleOver3;
    4420        1083 :                 padfWeightsYShifted[j] =
    4421             :                     dfY == 0
    4422        1083 :                         ? 1.0
    4423        1083 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
    4424        1083 :                 const double dfNewCos =
    4425        1083 :                     dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
    4426        1083 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
    4427        1083 :                                              dfSinOver3 * dfSinPiYScaleOver3;
    4428        1083 :                 dfSin = dfNewSin;
    4429        1083 :                 dfCos = dfNewCos;
    4430        1083 :                 dfSinOver3 = dfNewSinOver3;
    4431        1083 :                 dfCosOver3 = dfNewCosOver3;
    4432             :             }
    4433             : 
    4434         127 :             psWrkStruct->iLastSrcY = iSrcY;
    4435         127 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4436             :         }
    4437             :     }
    4438             :     else
    4439             :     {
    4440     1106550 :         while (jMin - dfDeltaY < -3.0)
    4441      478232 :             jMin++;
    4442      628320 :         while (jMax - dfDeltaY > 3.0)
    4443           0 :             jMax--;
    4444             : 
    4445      628320 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4446      627488 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4447             :         {
    4448        7198 :             const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
    4449        7198 :             const double dfSin2PIDeltaYOver3 =
    4450             :                 dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
    4451             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
    4452        7198 :             const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
    4453        7198 :             const double dfSinPIDeltaY =
    4454        7198 :                 (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
    4455        7198 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4456        7198 :             const double dfInvPI2Over3xSinPIDeltaY =
    4457             :                 dfInvPI2Over3 * dfSinPIDeltaY;
    4458        7198 :             const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
    4459        7198 :                 -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
    4460        7198 :             const double dfSinPIOver3 = 0.8660254037844386;
    4461        7198 :             const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
    4462        7198 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
    4463             :             const double padfCst[] = {
    4464        7198 :                 dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
    4465        7198 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
    4466             :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
    4467        7198 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
    4468        7198 :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
    4469             : 
    4470       47777 :             for (int j = jMin; j <= jMax; ++j)
    4471             :             {
    4472       40579 :                 const double dfY = j - dfDeltaY;
    4473       40579 :                 if (dfY == 0.0)
    4474         468 :                     padfWeightsYShifted[j] = 1.0;
    4475             :                 else
    4476       40111 :                     padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
    4477             : #if DEBUG_VERBOSE
    4478             :                 // TODO(schwehr): AlmostEqual.
    4479             :                 // CPLAssert(fabs(padfWeightsYShifted[j] -
    4480             :                 //               GWKLanczosSinc(dfY, 3.0)) < 1e-10);
    4481             : #endif
    4482             :             }
    4483             : 
    4484        7198 :             psWrkStruct->iLastSrcY = iSrcY;
    4485        7198 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4486             :         }
    4487             :     }
    4488             : 
    4489             :     // If we have no density information, we can simply compute the
    4490             :     // accumulated weight.
    4491      634574 :     if (padfRowDensity == nullptr)
    4492             :     {
    4493      634574 :         double dfRowAccWeight = 0.0;
    4494     5159250 :         for (int i = iMin; i <= iMax; ++i)
    4495             :         {
    4496     4524680 :             dfRowAccWeight += padfWeightsXShifted[i];
    4497             :         }
    4498      634574 :         double dfColAccWeight = 0.0;
    4499     4564130 :         for (int j = jMin; j <= jMax; ++j)
    4500             :         {
    4501     3929550 :             dfColAccWeight += padfWeightsYShifted[j];
    4502             :         }
    4503      634574 :         dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
    4504             :     }
    4505             : 
    4506             :     // Loop over pixel rows in the kernel.
    4507             : 
    4508      634574 :     if (poWK->eWorkingDataType == GDT_UInt8 && !poWK->panUnifiedSrcValid &&
    4509      633954 :         !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
    4510             :         !padfRowDensity)
    4511             :     {
    4512             :         // Optimization for Byte case without any masking/alpha
    4513             : 
    4514      633954 :         if (dfAccumulatorWeight < 0.000001)
    4515             :         {
    4516           0 :             *pdfDensity = 0.0;
    4517           0 :             return false;
    4518             :         }
    4519             : 
    4520      633954 :         const GByte *pSrc =
    4521      633954 :             reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
    4522      633954 :         pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4523             : 
    4524             : #if defined(USE_SSE2)
    4525      633954 :         if (iMax - iMin + 1 == 6)
    4526             :         {
    4527             :             // This is just an optimized version of the general case in
    4528             :             // the else clause.
    4529             : 
    4530      359916 :             pSrc += iMin;
    4531      359916 :             int j = jMin;
    4532             :             const auto fourXWeights =
    4533      359916 :                 XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
    4534             : 
    4535             :             // Process 2 lines at the same time.
    4536     1424180 :             for (; j < jMax; j += 2)
    4537             :             {
    4538             :                 const XMMReg4Double v_acc =
    4539     1064270 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4540             :                 const XMMReg4Double v_acc2 =
    4541     1064270 :                     XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
    4542     1064270 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4543     1064270 :                 const double dfRowAccEnd =
    4544     1064270 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4545     1064270 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4546     1064270 :                 dfAccumulatorReal +=
    4547     1064270 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4548     1064270 :                 const double dfRowAcc2 = v_acc2.GetHorizSum();
    4549     1064270 :                 const double dfRowAcc2End =
    4550     1064270 :                     pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
    4551     1064270 :                     pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
    4552     1064270 :                 dfAccumulatorReal +=
    4553     1064270 :                     (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
    4554     1064270 :                 pSrc += 2 * nSrcXSize;
    4555             :             }
    4556      359916 :             if (j == jMax)
    4557             :             {
    4558             :                 // Process last line if there's an odd number of them.
    4559             : 
    4560             :                 const XMMReg4Double v_acc =
    4561       90039 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4562       90039 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4563       90039 :                 const double dfRowAccEnd =
    4564       90039 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4565       90039 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4566       90039 :                 dfAccumulatorReal +=
    4567       90039 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4568             :             }
    4569             :         }
    4570             :         else
    4571             : #endif
    4572             :         {
    4573     1982080 :             for (int j = jMin; j <= jMax; ++j)
    4574             :             {
    4575     1708040 :                 int i = iMin;
    4576     1708040 :                 double dfRowAcc1 = 0.0;
    4577     1708040 :                 double dfRowAcc2 = 0.0;
    4578             :                 // A bit of loop unrolling
    4579     8474620 :                 for (; i < iMax; i += 2)
    4580             :                 {
    4581     6766580 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4582     6766580 :                     dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
    4583             :                 }
    4584     1708040 :                 if (i == iMax)
    4585             :                 {
    4586             :                     // Process last column if there's an odd number of them.
    4587     1188570 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4588             :                 }
    4589             : 
    4590     1708040 :                 dfAccumulatorReal +=
    4591     1708040 :                     (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
    4592     1708040 :                 pSrc += nSrcXSize;
    4593             :             }
    4594             :         }
    4595             : 
    4596             :         // Calculate the output taking into account weighting.
    4597      633954 :         if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4598             :         {
    4599      579748 :             const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4600      579748 :             *pdfReal = dfAccumulatorReal * dfInvAcc;
    4601      579748 :             *pdfDensity = 1.0;
    4602             :         }
    4603             :         else
    4604             :         {
    4605       54206 :             *pdfReal = dfAccumulatorReal;
    4606       54206 :             *pdfDensity = 1.0;
    4607             :         }
    4608             : 
    4609      633954 :         return true;
    4610             :     }
    4611             : 
    4612         620 :     GPtrDiff_t iRowOffset =
    4613         620 :         iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
    4614             : 
    4615         620 :     int nCountValid = 0;
    4616         620 :     const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
    4617             : 
    4618        3560 :     for (int j = jMin; j <= jMax; ++j)
    4619             :     {
    4620        2940 :         iRowOffset += nSrcXSize;
    4621             : 
    4622             :         // Get pixel values.
    4623             :         // We can potentially read extra elements after the "normal" end of the
    4624             :         // source arrays, but the contract of papabySrcImage[iBand],
    4625             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4626             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4627        2940 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4628             :                             padfRowDensity, padfRowReal, padfRowImag))
    4629           0 :             continue;
    4630             : 
    4631        2940 :         const double dfWeight1 = padfWeightsYShifted[j];
    4632             : 
    4633             :         // Iterate over pixels in row.
    4634        2940 :         if (padfRowDensity != nullptr)
    4635             :         {
    4636           0 :             for (int i = iMin; i <= iMax; ++i)
    4637             :             {
    4638             :                 // Skip sampling if pixel has zero density.
    4639           0 :                 if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
    4640           0 :                     continue;
    4641             : 
    4642           0 :                 nCountValid++;
    4643             : 
    4644             :                 //  Use a cached set of weights for this row.
    4645           0 :                 const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
    4646             : 
    4647             :                 // Accumulate!
    4648           0 :                 dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
    4649           0 :                 dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
    4650           0 :                 dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
    4651           0 :                 dfAccumulatorWeight += dfWeight2;
    4652             :             }
    4653             :         }
    4654        2940 :         else if (bIsNonComplex)
    4655             :         {
    4656        1764 :             double dfRowAccReal = 0.0;
    4657       10560 :             for (int i = iMin; i <= iMax; ++i)
    4658             :             {
    4659        8796 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4660             : 
    4661             :                 // Accumulate!
    4662        8796 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4663             :             }
    4664             : 
    4665        1764 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4666             :         }
    4667             :         else
    4668             :         {
    4669        1176 :             double dfRowAccReal = 0.0;
    4670        1176 :             double dfRowAccImag = 0.0;
    4671        7040 :             for (int i = iMin; i <= iMax; ++i)
    4672             :             {
    4673        5864 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4674             : 
    4675             :                 // Accumulate!
    4676        5864 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4677        5864 :                 dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
    4678             :             }
    4679             : 
    4680        1176 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4681        1176 :             dfAccumulatorImag += dfRowAccImag * dfWeight1;
    4682             :         }
    4683             :     }
    4684             : 
    4685         620 :     if (dfAccumulatorWeight < 0.000001 ||
    4686           0 :         (padfRowDensity != nullptr &&
    4687           0 :          (dfAccumulatorDensity < 0.000001 ||
    4688           0 :           nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
    4689             :     {
    4690           0 :         *pdfDensity = 0.0;
    4691           0 :         return false;
    4692             :     }
    4693             : 
    4694             :     // Calculate the output taking into account weighting.
    4695         620 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4696             :     {
    4697           0 :         const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4698           0 :         *pdfReal = dfAccumulatorReal * dfInvAcc;
    4699           0 :         *pdfImag = dfAccumulatorImag * dfInvAcc;
    4700           0 :         if (padfRowDensity != nullptr)
    4701           0 :             *pdfDensity = dfAccumulatorDensity * dfInvAcc;
    4702             :         else
    4703           0 :             *pdfDensity = 1.0;
    4704             :     }
    4705             :     else
    4706             :     {
    4707         620 :         *pdfReal = dfAccumulatorReal;
    4708         620 :         *pdfImag = dfAccumulatorImag;
    4709         620 :         if (padfRowDensity != nullptr)
    4710           0 :             *pdfDensity = dfAccumulatorDensity;
    4711             :         else
    4712         620 :             *pdfDensity = 1.0;
    4713             :     }
    4714             : 
    4715         620 :     return true;
    4716             : }
    4717             : 
    4718             : /************************************************************************/
    4719             : /*                         GWKComputeWeights()                          */
    4720             : /************************************************************************/
    4721             : 
    4722     1091070 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
    4723             :                               double dfDeltaX, double dfXScale, int jMin,
    4724             :                               int jMax, double dfDeltaY, double dfYScale,
    4725             :                               double *padfWeightsHorizontal,
    4726             :                               double *padfWeightsVertical, double &dfInvWeights)
    4727             : {
    4728             : 
    4729     1091070 :     const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
    4730     1091070 :     CPLAssert(pfnGetWeight);
    4731     1091070 :     const FilterFunc4ValuesType pfnGetWeight4Values =
    4732     1091070 :         apfGWKFilter4Values[eResample];
    4733     1091070 :     CPLAssert(pfnGetWeight4Values);
    4734             : 
    4735     1091070 :     int i = iMin;  // Used after for.
    4736     1091070 :     int iC = 0;    // Used after for.
    4737             :     // Not zero, but as close as possible to it, to avoid potential division by
    4738             :     // zero at end of function
    4739     1091070 :     double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
    4740     2403700 :     for (; i + 2 < iMax; i += 4, iC += 4)
    4741             :     {
    4742     1312620 :         padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
    4743     1312620 :         padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
    4744     1312620 :         padfWeightsHorizontal[iC + 2] =
    4745     1312620 :             padfWeightsHorizontal[iC + 1] + dfXScale;
    4746     1312620 :         padfWeightsHorizontal[iC + 3] =
    4747     1312620 :             padfWeightsHorizontal[iC + 2] + dfXScale;
    4748     1312620 :         dfAccumulatorWeightHorizontal +=
    4749     1312620 :             pfnGetWeight4Values(padfWeightsHorizontal + iC);
    4750             :     }
    4751     1145700 :     for (; i <= iMax; ++i, ++iC)
    4752             :     {
    4753       54623 :         const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
    4754       54623 :         padfWeightsHorizontal[iC] = dfWeight;
    4755       54623 :         dfAccumulatorWeightHorizontal += dfWeight;
    4756             :     }
    4757             : 
    4758     1091070 :     int j = jMin;  // Used after for.
    4759     1091070 :     int jC = 0;    // Used after for.
    4760             :     // Not zero, but as close as possible to it, to avoid potential division by
    4761             :     // zero at end of function
    4762     1091070 :     double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
    4763     2332840 :     for (; j + 2 < jMax; j += 4, jC += 4)
    4764             :     {
    4765     1241770 :         padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
    4766     1241770 :         padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
    4767     1241770 :         padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
    4768     1241770 :         padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
    4769     1241770 :         dfAccumulatorWeightVertical +=
    4770     1241770 :             pfnGetWeight4Values(padfWeightsVertical + jC);
    4771             :     }
    4772     1152230 :     for (; j <= jMax; ++j, ++jC)
    4773             :     {
    4774       61154 :         const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
    4775       61154 :         padfWeightsVertical[jC] = dfWeight;
    4776       61154 :         dfAccumulatorWeightVertical += dfWeight;
    4777             :     }
    4778             : 
    4779     1091070 :     dfInvWeights =
    4780     1091070 :         1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
    4781     1091070 : }
    4782             : 
    4783             : /************************************************************************/
    4784             : /*                        GWKResampleNoMasksT()                         */
    4785             : /************************************************************************/
    4786             : 
    4787             : template <class T>
    4788             : static bool
    4789             : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    4790             :                     double dfSrcY, T *pValue, double *padfWeightsHorizontal,
    4791             :                     double *padfWeightsVertical, double &dfInvWeights)
    4792             : 
    4793             : {
    4794             :     // Commonly used; save locally.
    4795             :     const int nSrcXSize = poWK->nSrcXSize;
    4796             :     const int nSrcYSize = poWK->nSrcYSize;
    4797             : 
    4798             :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4799             :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4800             :     const GPtrDiff_t iSrcOffset =
    4801             :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4802             : 
    4803             :     const int nXRadius = poWK->nXRadius;
    4804             :     const int nYRadius = poWK->nYRadius;
    4805             : 
    4806             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4807             :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4808             :         nYRadius > nSrcYSize)
    4809             :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4810             :                                                   pValue);
    4811             : 
    4812             :     T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    4813             :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4814             :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4815             : 
    4816             :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4817             :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4818             : 
    4819             :     int iMin = 1 - nXRadius;
    4820             :     if (iSrcX + iMin < 0)
    4821             :         iMin = -iSrcX;
    4822             :     int iMax = nXRadius;
    4823             :     if (iSrcX + iMax >= nSrcXSize - 1)
    4824             :         iMax = nSrcXSize - 1 - iSrcX;
    4825             : 
    4826             :     int jMin = 1 - nYRadius;
    4827             :     if (iSrcY + jMin < 0)
    4828             :         jMin = -iSrcY;
    4829             :     int jMax = nYRadius;
    4830             :     if (iSrcY + jMax >= nSrcYSize - 1)
    4831             :         jMax = nSrcYSize - 1 - iSrcY;
    4832             : 
    4833             :     if (iBand == 0)
    4834             :     {
    4835             :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4836             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4837             :                           padfWeightsVertical, dfInvWeights);
    4838             :     }
    4839             : 
    4840             :     // Loop over all rows in the kernel.
    4841             :     double dfAccumulator = 0.0;
    4842             :     for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
    4843             :     {
    4844             :         const GPtrDiff_t iSampJ =
    4845             :             iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
    4846             : 
    4847             :         // Loop over all pixels in the row.
    4848             :         double dfAccumulatorLocal = 0.0;
    4849             :         double dfAccumulatorLocal2 = 0.0;
    4850             :         int iC = 0;
    4851             :         int i = iMin;
    4852             :         // Process by chunk of 4 cols.
    4853             :         for (; i + 2 < iMax; i += 4, iC += 4)
    4854             :         {
    4855             :             // Retrieve the pixel & accumulate.
    4856             :             dfAccumulatorLocal +=
    4857             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4858             :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    4859             :                                   padfWeightsHorizontal[iC + 1];
    4860             :             dfAccumulatorLocal2 += double(pSrcBand[i + 2 + iSampJ]) *
    4861             :                                    padfWeightsHorizontal[iC + 2];
    4862             :             dfAccumulatorLocal2 += double(pSrcBand[i + 3 + iSampJ]) *
    4863             :                                    padfWeightsHorizontal[iC + 3];
    4864             :         }
    4865             :         dfAccumulatorLocal += dfAccumulatorLocal2;
    4866             :         if (i < iMax)
    4867             :         {
    4868             :             dfAccumulatorLocal +=
    4869             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4870             :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    4871             :                                   padfWeightsHorizontal[iC + 1];
    4872             :             i += 2;
    4873             :             iC += 2;
    4874             :         }
    4875             :         if (i == iMax)
    4876             :         {
    4877             :             dfAccumulatorLocal +=
    4878             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4879             :         }
    4880             : 
    4881             :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4882             :     }
    4883             : 
    4884             :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4885             : 
    4886             :     return true;
    4887             : }
    4888             : 
    4889             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    4890             : /* Could possibly be used too on 32bit, but we would need to check at runtime */
    4891             : #if defined(USE_SSE2)
    4892             : 
    4893             : /************************************************************************/
    4894             : /*                     GWKResampleNoMasks_SSE2_T()                      */
    4895             : /************************************************************************/
    4896             : 
    4897             : template <class T>
    4898     1382149 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
    4899             :                                       double dfSrcX, double dfSrcY, T *pValue,
    4900             :                                       double *padfWeightsHorizontal,
    4901             :                                       double *padfWeightsVertical,
    4902             :                                       double &dfInvWeights)
    4903             : {
    4904             :     // Commonly used; save locally.
    4905     1382149 :     const int nSrcXSize = poWK->nSrcXSize;
    4906     1382149 :     const int nSrcYSize = poWK->nSrcYSize;
    4907             : 
    4908     1382149 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4909     1382149 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4910     1382149 :     const GPtrDiff_t iSrcOffset =
    4911     1382149 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4912     1382149 :     const int nXRadius = poWK->nXRadius;
    4913     1382149 :     const int nYRadius = poWK->nYRadius;
    4914             : 
    4915             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4916     1382149 :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4917             :         nYRadius > nSrcYSize)
    4918           3 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4919           3 :                                                   pValue);
    4920             : 
    4921     1382146 :     const T *pSrcBand =
    4922     1382146 :         reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    4923             : 
    4924     1382146 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4925     1382146 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4926     1382146 :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4927     1382146 :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4928             : 
    4929     1382146 :     int iMin = 1 - nXRadius;
    4930     1382146 :     if (iSrcX + iMin < 0)
    4931       20312 :         iMin = -iSrcX;
    4932     1382146 :     int iMax = nXRadius;
    4933     1382146 :     if (iSrcX + iMax >= nSrcXSize - 1)
    4934        7970 :         iMax = nSrcXSize - 1 - iSrcX;
    4935             : 
    4936     1382146 :     int jMin = 1 - nYRadius;
    4937     1382146 :     if (iSrcY + jMin < 0)
    4938       22209 :         jMin = -iSrcY;
    4939     1382146 :     int jMax = nYRadius;
    4940     1382146 :     if (iSrcY + jMax >= nSrcYSize - 1)
    4941        9295 :         jMax = nSrcYSize - 1 - iSrcY;
    4942             : 
    4943     1382146 :     if (iBand == 0)
    4944             :     {
    4945     1091074 :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4946             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4947             :                           padfWeightsVertical, dfInvWeights);
    4948             :     }
    4949             : 
    4950     1382146 :     GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4951             :     // Process by chunk of 4 rows.
    4952     1382146 :     int jC = 0;
    4953     1382146 :     int j = jMin;
    4954     1382146 :     double dfAccumulator = 0.0;
    4955     3068580 :     for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
    4956             :     {
    4957             :         // Loop over all pixels in the row.
    4958     1686436 :         int iC = 0;
    4959     1686436 :         int i = iMin;
    4960             :         // Process by chunk of 4 cols.
    4961     1686436 :         XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
    4962     1686436 :         XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
    4963     1686436 :         XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
    4964     1686436 :         XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
    4965     4251632 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4966             :         {
    4967             :             // Retrieve the pixel & accumulate.
    4968     2565196 :             XMMReg4Double v_pixels_1 =
    4969     2565196 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4970     2565196 :             XMMReg4Double v_pixels_2 =
    4971     2565196 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
    4972     2565196 :             XMMReg4Double v_pixels_3 =
    4973     2565196 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4974     2565196 :             XMMReg4Double v_pixels_4 =
    4975     2565196 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4976             : 
    4977     2565196 :             XMMReg4Double v_padfWeight =
    4978     2565196 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4979             : 
    4980     2565196 :             v_acc_1 += v_pixels_1 * v_padfWeight;
    4981     2565196 :             v_acc_2 += v_pixels_2 * v_padfWeight;
    4982     2565196 :             v_acc_3 += v_pixels_3 * v_padfWeight;
    4983     2565196 :             v_acc_4 += v_pixels_4 * v_padfWeight;
    4984             :         }
    4985             : 
    4986     1686436 :         if (i < iMax)
    4987             :         {
    4988       25512 :             XMMReg2Double v_pixels_1 =
    4989       25512 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
    4990       25512 :             XMMReg2Double v_pixels_2 =
    4991       25512 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
    4992       25512 :             XMMReg2Double v_pixels_3 =
    4993       25512 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4994       25512 :             XMMReg2Double v_pixels_4 =
    4995       25512 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4996             : 
    4997       25512 :             XMMReg2Double v_padfWeight =
    4998       25512 :                 XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
    4999             : 
    5000       25512 :             v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
    5001       25512 :             v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
    5002       25512 :             v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
    5003       25512 :             v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
    5004             : 
    5005       25512 :             i += 2;
    5006       25512 :             iC += 2;
    5007             :         }
    5008             : 
    5009     1686436 :         double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
    5010     1686436 :         double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
    5011     1686436 :         double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
    5012     1686436 :         double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
    5013             : 
    5014     1686436 :         if (i == iMax)
    5015             :         {
    5016       27557 :             dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
    5017       27557 :                                     padfWeightsHorizontal[iC];
    5018       27557 :             dfAccumulatorLocal_2 +=
    5019       27557 :                 static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
    5020       27557 :                 padfWeightsHorizontal[iC];
    5021       27557 :             dfAccumulatorLocal_3 +=
    5022       27557 :                 static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
    5023       27557 :                 padfWeightsHorizontal[iC];
    5024       27557 :             dfAccumulatorLocal_4 +=
    5025       27557 :                 static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
    5026       27557 :                 padfWeightsHorizontal[iC];
    5027             :         }
    5028             : 
    5029     1686436 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
    5030     1686436 :         dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
    5031     1686436 :         dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
    5032     1686436 :         dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
    5033             :     }
    5034     1456100 :     for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
    5035             :     {
    5036             :         // Loop over all pixels in the row.
    5037       73954 :         int iC = 0;
    5038       73954 :         int i = iMin;
    5039             :         // Process by chunk of 4 cols.
    5040       73954 :         XMMReg4Double v_acc = XMMReg4Double::Zero();
    5041      172926 :         for (; i + 2 < iMax; i += 4, iC += 4)
    5042             :         {
    5043             :             // Retrieve the pixel & accumulate.
    5044       98972 :             XMMReg4Double v_pixels =
    5045       98972 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    5046       98972 :             XMMReg4Double v_padfWeight =
    5047       98972 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    5048             : 
    5049       98972 :             v_acc += v_pixels * v_padfWeight;
    5050             :         }
    5051             : 
    5052       73954 :         double dfAccumulatorLocal = v_acc.GetHorizSum();
    5053             : 
    5054       73954 :         if (i < iMax)
    5055             :         {
    5056        1862 :             dfAccumulatorLocal +=
    5057        1862 :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    5058        1862 :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    5059        1862 :                                   padfWeightsHorizontal[iC + 1];
    5060        1862 :             i += 2;
    5061        1862 :             iC += 2;
    5062             :         }
    5063       73954 :         if (i == iMax)
    5064             :         {
    5065        1803 :             dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
    5066        1803 :                                   padfWeightsHorizontal[iC];
    5067             :         }
    5068             : 
    5069       73954 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    5070             :     }
    5071             : 
    5072     1382146 :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    5073             : 
    5074     1382146 :     return true;
    5075             : }
    5076             : 
    5077             : /************************************************************************/
    5078             : /*                     GWKResampleNoMasksT<GByte>()                     */
    5079             : /************************************************************************/
    5080             : 
    5081             : template <>
    5082      877023 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
    5083             :                                 double dfSrcX, double dfSrcY, GByte *pValue,
    5084             :                                 double *padfWeightsHorizontal,
    5085             :                                 double *padfWeightsVertical,
    5086             :                                 double &dfInvWeights)
    5087             : {
    5088      877023 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5089             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5090      877023 :                                      dfInvWeights);
    5091             : }
    5092             : 
    5093             : /************************************************************************/
    5094             : /*                    GWKResampleNoMasksT<GInt16>()                     */
    5095             : /************************************************************************/
    5096             : 
    5097             : template <>
    5098      252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
    5099             :                                  double dfSrcX, double dfSrcY, GInt16 *pValue,
    5100             :                                  double *padfWeightsHorizontal,
    5101             :                                  double *padfWeightsVertical,
    5102             :                                  double &dfInvWeights)
    5103             : {
    5104      252563 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5105             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5106      252563 :                                      dfInvWeights);
    5107             : }
    5108             : 
    5109             : /************************************************************************/
    5110             : /*                    GWKResampleNoMasksT<GUInt16>()                    */
    5111             : /************************************************************************/
    5112             : 
    5113             : template <>
    5114      250063 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
    5115             :                                   double dfSrcX, double dfSrcY, GUInt16 *pValue,
    5116             :                                   double *padfWeightsHorizontal,
    5117             :                                   double *padfWeightsVertical,
    5118             :                                   double &dfInvWeights)
    5119             : {
    5120      250063 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5121             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5122      250063 :                                      dfInvWeights);
    5123             : }
    5124             : 
    5125             : /************************************************************************/
    5126             : /*                     GWKResampleNoMasksT<float>()                     */
    5127             : /************************************************************************/
    5128             : 
    5129             : template <>
    5130        2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
    5131             :                                 double dfSrcX, double dfSrcY, float *pValue,
    5132             :                                 double *padfWeightsHorizontal,
    5133             :                                 double *padfWeightsVertical,
    5134             :                                 double &dfInvWeights)
    5135             : {
    5136        2500 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5137             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5138        2500 :                                      dfInvWeights);
    5139             : }
    5140             : 
    5141             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    5142             : 
    5143             : /************************************************************************/
    5144             : /*                    GWKResampleNoMasksT<double>()                     */
    5145             : /************************************************************************/
    5146             : 
    5147             : template <>
    5148             : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
    5149             :                                  double dfSrcX, double dfSrcY, double *pValue,
    5150             :                                  double *padfWeightsHorizontal,
    5151             :                                  double *padfWeightsVertical,
    5152             :                                  double &dfInvWeights)
    5153             : {
    5154             :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5155             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5156             :                                      dfInvWeights);
    5157             : }
    5158             : 
    5159             : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
    5160             : 
    5161             : #endif /* defined(USE_SSE2) */
    5162             : 
    5163             : /************************************************************************/
    5164             : /*                     GWKRoundSourceCoordinates()                      */
    5165             : /************************************************************************/
    5166             : 
    5167        1000 : static void GWKRoundSourceCoordinates(
    5168             :     int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
    5169             :     double dfSrcCoordPrecision, double dfErrorThreshold,
    5170             :     GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
    5171             :     double dfDstY)
    5172             : {
    5173        1000 :     double dfPct = 0.8;
    5174        1000 :     if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
    5175             :     {
    5176        1000 :         dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
    5177             :     }
    5178        1000 :     const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
    5179             : 
    5180      501000 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5181             :     {
    5182      500000 :         const double dfXBefore = padfX[iDstX];
    5183      500000 :         const double dfYBefore = padfY[iDstX];
    5184      500000 :         padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    5185             :                        dfSrcCoordPrecision;
    5186      500000 :         padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    5187             :                        dfSrcCoordPrecision;
    5188             : 
    5189             :         // If we are in an uncertainty zone, go to non-approximated
    5190             :         // transformation.
    5191             :         // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
    5192             :         // be at least 10 times greater than the approximation error.
    5193      500000 :         if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
    5194      399914 :             fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
    5195             :         {
    5196      180090 :             padfX[iDstX] = iDstX + dfDstXOff;
    5197      180090 :             padfY[iDstX] = dfDstY;
    5198      180090 :             padfZ[iDstX] = 0.0;
    5199      180090 :             pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
    5200      180090 :                            padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
    5201      180090 :             padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    5202             :                            dfSrcCoordPrecision;
    5203      180090 :             padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    5204             :                            dfSrcCoordPrecision;
    5205             :         }
    5206             :     }
    5207        1000 : }
    5208             : 
    5209             : /************************************************************************/
    5210             : /*                    GWKCheckAndComputeSrcOffsets()                    */
    5211             : /************************************************************************/
    5212             : static CPL_INLINE bool
    5213   187159000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
    5214             :                              int _iDstY, double *_padfX, double *_padfY,
    5215             :                              int _nSrcXSize, int _nSrcYSize,
    5216             :                              GPtrDiff_t &iSrcOffset)
    5217             : {
    5218   187159000 :     const GDALWarpKernel *_poWK = psJob->poWK;
    5219   193762000 :     for (int iTry = 0; iTry < 2; ++iTry)
    5220             :     {
    5221   193762000 :         if (iTry == 1)
    5222             :         {
    5223             :             // If the source coordinate is slightly outside of the source raster
    5224             :             // retry to transform it alone, so that the exact coordinate
    5225             :             // transformer is used.
    5226             : 
    5227     6603180 :             _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
    5228     6603180 :             _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
    5229     6603180 :             double dfZ = 0;
    5230     6603180 :             _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
    5231     6603180 :                                   _padfX + _iDstX, _padfY + _iDstX, &dfZ,
    5232     6603180 :                                   _pabSuccess + _iDstX);
    5233             :         }
    5234   193762000 :         if (!_pabSuccess[_iDstX])
    5235     3615020 :             return false;
    5236             : 
    5237             :         // If this happens this is likely the symptom of a bug somewhere.
    5238   190147000 :         if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
    5239             :         {
    5240             :             static bool bNanCoordFound = false;
    5241           0 :             if (!bNanCoordFound)
    5242             :             {
    5243           0 :                 CPLDebug("WARP",
    5244             :                          "GWKCheckAndComputeSrcOffsets(): "
    5245             :                          "NaN coordinate found on point %d.",
    5246             :                          _iDstX);
    5247           0 :                 bNanCoordFound = true;
    5248             :             }
    5249           0 :             return false;
    5250             :         }
    5251             : 
    5252             :         /* --------------------------------------------------------------------
    5253             :          */
    5254             :         /*      Figure out what pixel we want in our source raster, and skip */
    5255             :         /*      further processing if it is well off the source image. */
    5256             :         /* --------------------------------------------------------------------
    5257             :          */
    5258             :         /* We test against the value before casting to avoid the */
    5259             :         /* problem of asymmetric truncation effects around zero.  That is */
    5260             :         /* -0.5 will be 0 when cast to an int. */
    5261   190147000 :         if (_padfX[_iDstX] < _poWK->nSrcXOff)
    5262             :         {
    5263             :             // If the source coordinate is slightly outside of the source raster
    5264             :             // retry to transform it alone, so that the exact coordinate
    5265             :             // transformer is used.
    5266    16858100 :             if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
    5267     2889470 :                 continue;
    5268    13968600 :             return false;
    5269             :         }
    5270             : 
    5271   173289000 :         if (_padfY[_iDstX] < _poWK->nSrcYOff)
    5272             :         {
    5273             :             // If the source coordinate is slightly outside of the source raster
    5274             :             // retry to transform it alone, so that the exact coordinate
    5275             :             // transformer is used.
    5276     7890610 :             if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
    5277      635435 :                 continue;
    5278     7255180 :             return false;
    5279             :         }
    5280             : 
    5281             :         // Check for potential overflow when casting from float to int, (if
    5282             :         // operating outside natural projection area, padfX/Y can be a very huge
    5283             :         // positive number before doing the actual conversion), as such cast is
    5284             :         // undefined behavior that can trigger exception with some compilers
    5285             :         // (see #6753)
    5286   165399000 :         if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
    5287             :         {
    5288             :             // If the source coordinate is slightly outside of the source raster
    5289             :             // retry to transform it alone, so that the exact coordinate
    5290             :             // transformer is used.
    5291    13193200 :             if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
    5292     2712400 :                 continue;
    5293    10480800 :             return false;
    5294             :         }
    5295   152205000 :         if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
    5296             :         {
    5297             :             // If the source coordinate is slightly outside of the source raster
    5298             :             // retry to transform it alone, so that the exact coordinate
    5299             :             // transformer is used.
    5300     5680180 :             if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
    5301      365873 :                 continue;
    5302     5314300 :             return false;
    5303             :         }
    5304             : 
    5305   146525000 :         break;
    5306             :     }
    5307             : 
    5308   146525000 :     int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
    5309   146525000 :     int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
    5310   146525000 :     if (iSrcX == _nSrcXSize)
    5311           0 :         iSrcX--;
    5312   146525000 :     if (iSrcY == _nSrcYSize)
    5313           0 :         iSrcY--;
    5314             : 
    5315             :     // Those checks should normally be OK given the previous ones.
    5316   146525000 :     CPLAssert(iSrcX >= 0);
    5317   146525000 :     CPLAssert(iSrcY >= 0);
    5318   146525000 :     CPLAssert(iSrcX < _nSrcXSize);
    5319   146525000 :     CPLAssert(iSrcY < _nSrcYSize);
    5320             : 
    5321   146525000 :     iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
    5322             : 
    5323   146525000 :     return true;
    5324             : }
    5325             : 
    5326             : /************************************************************************/
    5327             : /*                 GWKOneSourceCornerFailsToReproject()                 */
    5328             : /************************************************************************/
    5329             : 
    5330         938 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
    5331             : {
    5332         938 :     GDALWarpKernel *poWK = psJob->poWK;
    5333        2802 :     for (int iY = 0; iY <= 1; ++iY)
    5334             :     {
    5335        5599 :         for (int iX = 0; iX <= 1; ++iX)
    5336             :         {
    5337        3735 :             double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
    5338        3735 :             double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
    5339        3735 :             double dfZTmp = 0;
    5340        3735 :             int nSuccess = FALSE;
    5341        3735 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
    5342             :                                  &dfYTmp, &dfZTmp, &nSuccess);
    5343        3735 :             if (!nSuccess)
    5344           7 :                 return true;
    5345             :         }
    5346             :     }
    5347         931 :     return false;
    5348             : }
    5349             : 
    5350             : /************************************************************************/
    5351             : /*                      GWKAdjustSrcOffsetOnEdge()                      */
    5352             : /************************************************************************/
    5353             : 
    5354        9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
    5355             :                                      GPtrDiff_t &iSrcOffset)
    5356             : {
    5357        9714 :     GDALWarpKernel *poWK = psJob->poWK;
    5358        9714 :     const int nSrcXSize = poWK->nSrcXSize;
    5359        9714 :     const int nSrcYSize = poWK->nSrcYSize;
    5360             : 
    5361             :     // Check if the computed source position slightly altered
    5362             :     // fails to reproject. If so, then we are at the edge of
    5363             :     // the validity area, and it is worth checking neighbour
    5364             :     // source pixels for validity.
    5365        9714 :     int nSuccess = FALSE;
    5366             :     {
    5367        9714 :         double dfXTmp =
    5368        9714 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5369        9714 :         double dfYTmp =
    5370        9714 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5371        9714 :         double dfZTmp = 0;
    5372        9714 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5373             :                              &dfZTmp, &nSuccess);
    5374             :     }
    5375        9714 :     if (nSuccess)
    5376             :     {
    5377        6996 :         double dfXTmp =
    5378        6996 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5379        6996 :         double dfYTmp =
    5380        6996 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5381        6996 :         double dfZTmp = 0;
    5382        6996 :         nSuccess = FALSE;
    5383        6996 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5384             :                              &dfZTmp, &nSuccess);
    5385             :     }
    5386        9714 :     if (nSuccess)
    5387             :     {
    5388        5624 :         double dfXTmp =
    5389        5624 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5390        5624 :         double dfYTmp =
    5391        5624 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5392        5624 :         double dfZTmp = 0;
    5393        5624 :         nSuccess = FALSE;
    5394        5624 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5395             :                              &dfZTmp, &nSuccess);
    5396             :     }
    5397             : 
    5398       14166 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5399        4452 :         CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
    5400             :     {
    5401        1860 :         iSrcOffset++;
    5402        1860 :         return true;
    5403             :     }
    5404       10290 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5405        2436 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
    5406             :     {
    5407        1334 :         iSrcOffset += nSrcXSize;
    5408        1334 :         return true;
    5409             :     }
    5410        7838 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5411        1318 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
    5412             :     {
    5413         956 :         iSrcOffset--;
    5414         956 :         return true;
    5415             :     }
    5416        5924 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5417         360 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
    5418             :     {
    5419         340 :         iSrcOffset -= nSrcXSize;
    5420         340 :         return true;
    5421             :     }
    5422             : 
    5423        5224 :     return false;
    5424             : }
    5425             : 
    5426             : /************************************************************************/
    5427             : /*             GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity()              */
    5428             : /************************************************************************/
    5429             : 
    5430           0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
    5431             :                                                       GPtrDiff_t &iSrcOffset)
    5432             : {
    5433           0 :     GDALWarpKernel *poWK = psJob->poWK;
    5434           0 :     const int nSrcXSize = poWK->nSrcXSize;
    5435           0 :     const int nSrcYSize = poWK->nSrcYSize;
    5436             : 
    5437             :     // Check if the computed source position slightly altered
    5438             :     // fails to reproject. If so, then we are at the edge of
    5439             :     // the validity area, and it is worth checking neighbour
    5440             :     // source pixels for validity.
    5441           0 :     int nSuccess = FALSE;
    5442             :     {
    5443           0 :         double dfXTmp =
    5444           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5445           0 :         double dfYTmp =
    5446           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5447           0 :         double dfZTmp = 0;
    5448           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5449             :                              &dfZTmp, &nSuccess);
    5450             :     }
    5451           0 :     if (nSuccess)
    5452             :     {
    5453           0 :         double dfXTmp =
    5454           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5455           0 :         double dfYTmp =
    5456           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5457           0 :         double dfZTmp = 0;
    5458           0 :         nSuccess = FALSE;
    5459           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5460             :                              &dfZTmp, &nSuccess);
    5461             :     }
    5462           0 :     if (nSuccess)
    5463             :     {
    5464           0 :         double dfXTmp =
    5465           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5466           0 :         double dfYTmp =
    5467           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5468           0 :         double dfZTmp = 0;
    5469           0 :         nSuccess = FALSE;
    5470           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5471             :                              &dfZTmp, &nSuccess);
    5472             :     }
    5473             : 
    5474           0 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5475           0 :         poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >=
    5476             :             SRC_DENSITY_THRESHOLD_FLOAT)
    5477             :     {
    5478           0 :         iSrcOffset++;
    5479           0 :         return true;
    5480             :     }
    5481           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5482           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
    5483             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5484             :     {
    5485           0 :         iSrcOffset += nSrcXSize;
    5486           0 :         return true;
    5487             :     }
    5488           0 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5489           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
    5490             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5491             :     {
    5492           0 :         iSrcOffset--;
    5493           0 :         return true;
    5494             :     }
    5495           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5496           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
    5497             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5498             :     {
    5499           0 :         iSrcOffset -= nSrcXSize;
    5500           0 :         return true;
    5501             :     }
    5502             : 
    5503           0 :     return false;
    5504             : }
    5505             : 
    5506             : /************************************************************************/
    5507             : /*                           GWKGeneralCase()                           */
    5508             : /*                                                                      */
    5509             : /*      This is the most general case.  It attempts to handle all       */
    5510             : /*      possible features with relatively little concern for            */
    5511             : /*      efficiency.                                                     */
    5512             : /************************************************************************/
    5513             : 
    5514         239 : static void GWKGeneralCaseThread(void *pData)
    5515             : {
    5516         239 :     GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
    5517         239 :     GDALWarpKernel *poWK = psJob->poWK;
    5518         239 :     const int iYMin = psJob->iYMin;
    5519         239 :     const int iYMax = psJob->iYMax;
    5520             :     const double dfMultFactorVerticalShiftPipeline =
    5521         239 :         poWK->bApplyVerticalShift
    5522         239 :             ? CPLAtof(CSLFetchNameValueDef(
    5523           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5524             :                   "1.0"))
    5525         239 :             : 0.0;
    5526             :     const bool bAvoidNoDataSingleBand =
    5527         239 :         poWK->nBands == 1 ||
    5528           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    5529         239 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    5530             : 
    5531         239 :     int nDstXSize = poWK->nDstXSize;
    5532         239 :     int nSrcXSize = poWK->nSrcXSize;
    5533         239 :     int nSrcYSize = poWK->nSrcYSize;
    5534             : 
    5535             :     /* -------------------------------------------------------------------- */
    5536             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5537             :     /*      scanlines worth of positions.                                   */
    5538             :     /* -------------------------------------------------------------------- */
    5539             :     // For x, 2 *, because we cache the precomputed values at the end.
    5540             :     double *padfX =
    5541         239 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5542             :     double *padfY =
    5543         239 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5544             :     double *padfZ =
    5545         239 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5546         239 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5547             : 
    5548         239 :     const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
    5549             : 
    5550         239 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5551         239 :     if (poWK->eResample != GRA_NearestNeighbour)
    5552             :     {
    5553         220 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5554             :     }
    5555         239 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5556         239 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5557         239 :     const double dfErrorThreshold = CPLAtof(
    5558         239 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5559             : 
    5560             :     const bool bOneSourceCornerFailsToReproject =
    5561         239 :         GWKOneSourceCornerFailsToReproject(psJob);
    5562             : 
    5563             :     // Precompute values.
    5564        6469 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5565        6230 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5566             : 
    5567             :     /* ==================================================================== */
    5568             :     /*      Loop over output lines.                                         */
    5569             :     /* ==================================================================== */
    5570        6469 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5571             :     {
    5572             :         /* --------------------------------------------------------------------
    5573             :          */
    5574             :         /*      Setup points to transform to source image space. */
    5575             :         /* --------------------------------------------------------------------
    5576             :          */
    5577        6230 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5578        6230 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5579      242390 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5580      236160 :             padfY[iDstX] = dfY;
    5581        6230 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5582             : 
    5583             :         /* --------------------------------------------------------------------
    5584             :          */
    5585             :         /*      Transform the points from destination pixel/line coordinates */
    5586             :         /*      to source pixel/line coordinates. */
    5587             :         /* --------------------------------------------------------------------
    5588             :          */
    5589        6230 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5590             :                              padfY, padfZ, pabSuccess);
    5591        6230 :         if (dfSrcCoordPrecision > 0.0)
    5592             :         {
    5593           0 :             GWKRoundSourceCoordinates(
    5594             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5595             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5596           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5597             :         }
    5598             : 
    5599             :         /* ====================================================================
    5600             :          */
    5601             :         /*      Loop over pixels in output scanline. */
    5602             :         /* ====================================================================
    5603             :          */
    5604      242390 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5605             :         {
    5606      236160 :             GPtrDiff_t iSrcOffset = 0;
    5607      236160 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5608             :                                               padfX, padfY, nSrcXSize,
    5609             :                                               nSrcYSize, iSrcOffset))
    5610           0 :                 continue;
    5611             : 
    5612             :             /* --------------------------------------------------------------------
    5613             :              */
    5614             :             /*      Do not try to apply transparent/invalid source pixels to the
    5615             :              */
    5616             :             /*      destination.  This currently ignores the multi-pixel input
    5617             :              */
    5618             :             /*      of bilinear and cubic resamples. */
    5619             :             /* --------------------------------------------------------------------
    5620             :              */
    5621      236160 :             double dfDensity = 1.0;
    5622             : 
    5623      236160 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5624             :             {
    5625        1200 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5626        1200 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    5627             :                 {
    5628           0 :                     if (!bOneSourceCornerFailsToReproject)
    5629             :                     {
    5630           0 :                         continue;
    5631             :                     }
    5632           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5633             :                                  psJob, iSrcOffset))
    5634             :                     {
    5635           0 :                         dfDensity =
    5636           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5637             :                     }
    5638             :                     else
    5639             :                     {
    5640           0 :                         continue;
    5641             :                     }
    5642             :                 }
    5643             :             }
    5644             : 
    5645      236160 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5646           0 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5647             :             {
    5648           0 :                 if (!bOneSourceCornerFailsToReproject)
    5649             :                 {
    5650           0 :                     continue;
    5651             :                 }
    5652           0 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5653             :                 {
    5654           0 :                     continue;
    5655             :                 }
    5656             :             }
    5657             : 
    5658             :             /* ====================================================================
    5659             :              */
    5660             :             /*      Loop processing each band. */
    5661             :             /* ====================================================================
    5662             :              */
    5663      236160 :             bool bHasFoundDensity = false;
    5664             : 
    5665      236160 :             const GPtrDiff_t iDstOffset =
    5666      236160 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5667      472320 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5668             :             {
    5669      236160 :                 double dfBandDensity = 0.0;
    5670      236160 :                 double dfValueReal = 0.0;
    5671      236160 :                 double dfValueImag = 0.0;
    5672             : 
    5673             :                 /* --------------------------------------------------------------------
    5674             :                  */
    5675             :                 /*      Collect the source value. */
    5676             :                 /* --------------------------------------------------------------------
    5677             :                  */
    5678      236160 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5679             :                     nSrcYSize == 1)
    5680             :                 {
    5681             :                     // FALSE is returned if dfBandDensity == 0, which is
    5682             :                     // checked below.
    5683         568 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValue(
    5684             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
    5685             :                         &dfValueImag));
    5686             :                 }
    5687      235592 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5688             :                 {
    5689         248 :                     GWKBilinearResample4Sample(
    5690         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5691         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5692             :                         &dfValueReal, &dfValueImag);
    5693             :                 }
    5694      235344 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5695             :                 {
    5696         248 :                     GWKCubicResample4Sample(
    5697         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5698         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5699             :                         &dfValueReal, &dfValueImag);
    5700             :                 }
    5701             :                 else
    5702             : #ifdef DEBUG
    5703             :                     // Only useful for clang static analyzer.
    5704      235096 :                     if (psWrkStruct != nullptr)
    5705             : #endif
    5706             :                     {
    5707      235096 :                         psWrkStruct->pfnGWKResample(
    5708      235096 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5709      235096 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5710             :                             &dfValueReal, &dfValueImag, psWrkStruct);
    5711             :                     }
    5712             : 
    5713             :                 // If we didn't find any valid inputs skip to next band.
    5714      236160 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5715           0 :                     continue;
    5716             : 
    5717      236160 :                 if (poWK->bApplyVerticalShift)
    5718             :                 {
    5719           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5720           0 :                         continue;
    5721             :                     // Subtract padfZ[] since the coordinate transformation is
    5722             :                     // from target to source
    5723           0 :                     dfValueReal =
    5724           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5725           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5726             :                 }
    5727             : 
    5728      236160 :                 bHasFoundDensity = true;
    5729             : 
    5730             :                 /* --------------------------------------------------------------------
    5731             :                  */
    5732             :                 /*      We have a computed value from the source.  Now apply it
    5733             :                  * to      */
    5734             :                 /*      the destination pixel. */
    5735             :                 /* --------------------------------------------------------------------
    5736             :                  */
    5737      236160 :                 GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    5738             :                                  dfValueReal, dfValueImag,
    5739             :                                  bAvoidNoDataSingleBand);
    5740             :             }
    5741             : 
    5742      236160 :             if (!bHasFoundDensity)
    5743           0 :                 continue;
    5744             : 
    5745      236160 :             if (!bAvoidNoDataSingleBand)
    5746             :             {
    5747           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    5748             :             }
    5749             : 
    5750             :             /* --------------------------------------------------------------------
    5751             :              */
    5752             :             /*      Update destination density/validity masks. */
    5753             :             /* --------------------------------------------------------------------
    5754             :              */
    5755      236160 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5756             : 
    5757      236160 :             if (poWK->panDstValid != nullptr)
    5758             :             {
    5759           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5760             :             }
    5761             :         } /* Next iDstX */
    5762             : 
    5763             :         /* --------------------------------------------------------------------
    5764             :          */
    5765             :         /*      Report progress to the user, and optionally cancel out. */
    5766             :         /* --------------------------------------------------------------------
    5767             :          */
    5768        6230 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5769           0 :             break;
    5770             :     }
    5771             : 
    5772             :     /* -------------------------------------------------------------------- */
    5773             :     /*      Cleanup and return.                                             */
    5774             :     /* -------------------------------------------------------------------- */
    5775         239 :     CPLFree(padfX);
    5776         239 :     CPLFree(padfY);
    5777         239 :     CPLFree(padfZ);
    5778         239 :     CPLFree(pabSuccess);
    5779         239 :     if (psWrkStruct)
    5780         220 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5781         239 : }
    5782             : 
    5783         239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
    5784             : {
    5785         239 :     return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
    5786             : }
    5787             : 
    5788             : /************************************************************************/
    5789             : /*                            GWKRealCase()                             */
    5790             : /*                                                                      */
    5791             : /*      General case for non-complex data types.                        */
    5792             : /************************************************************************/
    5793             : 
    5794         223 : static void GWKRealCaseThread(void *pData)
    5795             : 
    5796             : {
    5797         223 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    5798         223 :     GDALWarpKernel *poWK = psJob->poWK;
    5799         223 :     const int iYMin = psJob->iYMin;
    5800         223 :     const int iYMax = psJob->iYMax;
    5801             : 
    5802         223 :     const int nDstXSize = poWK->nDstXSize;
    5803         223 :     const int nSrcXSize = poWK->nSrcXSize;
    5804         223 :     const int nSrcYSize = poWK->nSrcYSize;
    5805             :     const double dfMultFactorVerticalShiftPipeline =
    5806         223 :         poWK->bApplyVerticalShift
    5807         223 :             ? CPLAtof(CSLFetchNameValueDef(
    5808           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5809             :                   "1.0"))
    5810         223 :             : 0.0;
    5811             :     const bool bAvoidNoDataSingleBand =
    5812         305 :         poWK->nBands == 1 ||
    5813          82 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    5814         223 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    5815             : 
    5816             :     /* -------------------------------------------------------------------- */
    5817             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5818             :     /*      scanlines worth of positions.                                   */
    5819             :     /* -------------------------------------------------------------------- */
    5820             : 
    5821             :     // For x, 2 *, because we cache the precomputed values at the end.
    5822             :     double *padfX =
    5823         223 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5824             :     double *padfY =
    5825         223 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5826             :     double *padfZ =
    5827         223 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5828         223 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5829             : 
    5830         223 :     const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
    5831             : 
    5832         223 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5833         223 :     if (poWK->eResample != GRA_NearestNeighbour)
    5834             :     {
    5835         181 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5836             :     }
    5837         223 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5838         223 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5839         223 :     const double dfErrorThreshold = CPLAtof(
    5840         223 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5841             : 
    5842         638 :     const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
    5843         415 :                                    poWK->papanBandSrcValid == nullptr &&
    5844         192 :                                    poWK->pafUnifiedSrcDensity != nullptr;
    5845             : 
    5846             :     const bool bOneSourceCornerFailsToReproject =
    5847         223 :         GWKOneSourceCornerFailsToReproject(psJob);
    5848             : 
    5849             :     // Precompute values.
    5850       24657 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5851       24434 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5852             : 
    5853             :     /* ==================================================================== */
    5854             :     /*      Loop over output lines.                                         */
    5855             :     /* ==================================================================== */
    5856       25909 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5857             :     {
    5858             :         /* --------------------------------------------------------------------
    5859             :          */
    5860             :         /*      Setup points to transform to source image space. */
    5861             :         /* --------------------------------------------------------------------
    5862             :          */
    5863       25686 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5864       25686 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5865    44594200 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5866    44568500 :             padfY[iDstX] = dfY;
    5867       25686 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5868             : 
    5869             :         /* --------------------------------------------------------------------
    5870             :          */
    5871             :         /*      Transform the points from destination pixel/line coordinates */
    5872             :         /*      to source pixel/line coordinates. */
    5873             :         /* --------------------------------------------------------------------
    5874             :          */
    5875       25686 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5876             :                              padfY, padfZ, pabSuccess);
    5877       25686 :         if (dfSrcCoordPrecision > 0.0)
    5878             :         {
    5879           0 :             GWKRoundSourceCoordinates(
    5880             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5881             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5882           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5883             :         }
    5884             : 
    5885             :         /* ====================================================================
    5886             :          */
    5887             :         /*      Loop over pixels in output scanline. */
    5888             :         /* ====================================================================
    5889             :          */
    5890    44594200 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5891             :         {
    5892    44568500 :             GPtrDiff_t iSrcOffset = 0;
    5893    44568500 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5894             :                                               padfX, padfY, nSrcXSize,
    5895             :                                               nSrcYSize, iSrcOffset))
    5896    43823900 :                 continue;
    5897             : 
    5898             :             /* --------------------------------------------------------------------
    5899             :              */
    5900             :             /*      Do not try to apply transparent/invalid source pixels to the
    5901             :              */
    5902             :             /*      destination.  This currently ignores the multi-pixel input
    5903             :              */
    5904             :             /*      of bilinear and cubic resamples. */
    5905             :             /* --------------------------------------------------------------------
    5906             :              */
    5907    31812400 :             double dfDensity = 1.0;
    5908             : 
    5909    31812400 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5910             :             {
    5911     1669560 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5912     1669560 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    5913             :                 {
    5914     1538480 :                     if (!bOneSourceCornerFailsToReproject)
    5915             :                     {
    5916     1538480 :                         continue;
    5917             :                     }
    5918           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5919             :                                  psJob, iSrcOffset))
    5920             :                     {
    5921           0 :                         dfDensity =
    5922           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5923             :                     }
    5924             :                     else
    5925             :                     {
    5926           0 :                         continue;
    5927             :                     }
    5928             :                 }
    5929             :             }
    5930             : 
    5931    59903100 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5932    29629200 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5933             :             {
    5934    29531600 :                 if (!bOneSourceCornerFailsToReproject)
    5935             :                 {
    5936    29529300 :                     continue;
    5937             :                 }
    5938        2229 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5939             :                 {
    5940           0 :                     continue;
    5941             :                 }
    5942             :             }
    5943             : 
    5944             :             /* ====================================================================
    5945             :              */
    5946             :             /*      Loop processing each band. */
    5947             :             /* ====================================================================
    5948             :              */
    5949      744578 :             bool bHasFoundDensity = false;
    5950             : 
    5951      744578 :             const GPtrDiff_t iDstOffset =
    5952      744578 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5953     2092550 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5954             :             {
    5955     1347980 :                 double dfBandDensity = 0.0;
    5956     1347980 :                 double dfValueReal = 0.0;
    5957             : 
    5958             :                 /* --------------------------------------------------------------------
    5959             :                  */
    5960             :                 /*      Collect the source value. */
    5961             :                 /* --------------------------------------------------------------------
    5962             :                  */
    5963     1347980 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5964             :                     nSrcYSize == 1)
    5965             :                 {
    5966             :                     // FALSE is returned if dfBandDensity == 0, which is
    5967             :                     // checked below.
    5968       15516 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
    5969             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
    5970             :                 }
    5971     1332460 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5972             :                 {
    5973        2046 :                     double dfValueImagIgnored = 0.0;
    5974        2046 :                     GWKBilinearResample4Sample(
    5975        2046 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5976        2046 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5977        2046 :                         &dfValueReal, &dfValueImagIgnored);
    5978             :                 }
    5979     1330410 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5980             :                 {
    5981      691552 :                     if (bSrcMaskIsDensity)
    5982             :                     {
    5983      389755 :                         if (poWK->eWorkingDataType == GDT_UInt8)
    5984             :                         {
    5985      389755 :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
    5986      389755 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5987      389755 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5988             :                                 &dfValueReal);
    5989             :                         }
    5990           0 :                         else if (poWK->eWorkingDataType == GDT_UInt16)
    5991             :                         {
    5992             :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<
    5993           0 :                                 GUInt16>(poWK, iBand,
    5994           0 :                                          padfX[iDstX] - poWK->nSrcXOff,
    5995           0 :                                          padfY[iDstX] - poWK->nSrcYOff,
    5996             :                                          &dfBandDensity, &dfValueReal);
    5997             :                         }
    5998             :                         else
    5999             :                         {
    6000           0 :                             GWKCubicResampleSrcMaskIsDensity4SampleReal(
    6001           0 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6002           0 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6003             :                                 &dfValueReal);
    6004             :                         }
    6005             :                     }
    6006             :                     else
    6007             :                     {
    6008      301797 :                         double dfValueImagIgnored = 0.0;
    6009      301797 :                         GWKCubicResample4Sample(
    6010      301797 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6011      301797 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6012             :                             &dfValueReal, &dfValueImagIgnored);
    6013      691552 :                     }
    6014             :                 }
    6015             :                 else
    6016             : #ifdef DEBUG
    6017             :                     // Only useful for clang static analyzer.
    6018      638861 :                     if (psWrkStruct != nullptr)
    6019             : #endif
    6020             :                     {
    6021      638861 :                         double dfValueImagIgnored = 0.0;
    6022      638861 :                         psWrkStruct->pfnGWKResample(
    6023      638861 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6024      638861 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6025             :                             &dfValueReal, &dfValueImagIgnored, psWrkStruct);
    6026             :                     }
    6027             : 
    6028             :                 // If we didn't find any valid inputs skip to next band.
    6029     1347980 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    6030           0 :                     continue;
    6031             : 
    6032     1347980 :                 if (poWK->bApplyVerticalShift)
    6033             :                 {
    6034           0 :                     if (!std::isfinite(padfZ[iDstX]))
    6035           0 :                         continue;
    6036             :                     // Subtract padfZ[] since the coordinate transformation is
    6037             :                     // from target to source
    6038           0 :                     dfValueReal =
    6039           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    6040           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    6041             :                 }
    6042             : 
    6043     1347980 :                 bHasFoundDensity = true;
    6044             : 
    6045             :                 /* --------------------------------------------------------------------
    6046             :                  */
    6047             :                 /*      We have a computed value from the source.  Now apply it
    6048             :                  * to      */
    6049             :                 /*      the destination pixel. */
    6050             :                 /* --------------------------------------------------------------------
    6051             :                  */
    6052     1347980 :                 GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
    6053             :                                      dfValueReal, bAvoidNoDataSingleBand);
    6054             :             }
    6055             : 
    6056      744578 :             if (!bHasFoundDensity)
    6057           0 :                 continue;
    6058             : 
    6059      744578 :             if (!bAvoidNoDataSingleBand)
    6060             :             {
    6061      100295 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    6062             :             }
    6063             : 
    6064             :             /* --------------------------------------------------------------------
    6065             :              */
    6066             :             /*      Update destination density/validity masks. */
    6067             :             /* --------------------------------------------------------------------
    6068             :              */
    6069      744578 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6070             : 
    6071      744578 :             if (poWK->panDstValid != nullptr)
    6072             :             {
    6073      104586 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6074             :             }
    6075             :         }  // Next iDstX.
    6076             : 
    6077             :         /* --------------------------------------------------------------------
    6078             :          */
    6079             :         /*      Report progress to the user, and optionally cancel out. */
    6080             :         /* --------------------------------------------------------------------
    6081             :          */
    6082       25686 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6083           0 :             break;
    6084             :     }
    6085             : 
    6086             :     /* -------------------------------------------------------------------- */
    6087             :     /*      Cleanup and return.                                             */
    6088             :     /* -------------------------------------------------------------------- */
    6089         223 :     CPLFree(padfX);
    6090         223 :     CPLFree(padfY);
    6091         223 :     CPLFree(padfZ);
    6092         223 :     CPLFree(pabSuccess);
    6093         223 :     if (psWrkStruct)
    6094         181 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    6095         223 : }
    6096             : 
    6097         223 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
    6098             : {
    6099         223 :     return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
    6100             : }
    6101             : 
    6102             : /************************************************************************/
    6103             : /*                 GWKCubicResampleNoMasks4MultiBandT()                 */
    6104             : /************************************************************************/
    6105             : 
    6106             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    6107             : /* and enough SSE registries */
    6108             : #if defined(USE_SSE2)
    6109             : 
    6110   142031000 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
    6111             :                                  const __m128 row2, const __m128 row3,
    6112             :                                  const __m128 weightsXY0,
    6113             :                                  const __m128 weightsXY1,
    6114             :                                  const __m128 weightsXY2,
    6115             :                                  const __m128 weightsXY3)
    6116             : {
    6117   994218000 :     return XMMHorizontalAdd(_mm_add_ps(
    6118             :         _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
    6119             :         _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
    6120   142031000 :                    _mm_mul_ps(row3, weightsXY3))));
    6121             : }
    6122             : 
    6123             : template <class T>
    6124    48826142 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
    6125             :                                                double dfSrcX, double dfSrcY,
    6126             :                                                const GPtrDiff_t iDstOffset)
    6127             : {
    6128    48826142 :     const double dfSrcXShifted = dfSrcX - 0.5;
    6129    48826142 :     const int iSrcX = static_cast<int>(dfSrcXShifted);
    6130    48826142 :     const double dfSrcYShifted = dfSrcY - 0.5;
    6131    48826142 :     const int iSrcY = static_cast<int>(dfSrcYShifted);
    6132    48826142 :     const GPtrDiff_t iSrcOffset =
    6133    48826142 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    6134             : 
    6135             :     // Get the bilinear interpolation at the image borders.
    6136    48826142 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    6137    47479062 :         iSrcY + 2 >= poWK->nSrcYSize)
    6138             :     {
    6139     5929580 :         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6140             :         {
    6141             :             T value;
    6142     4447190 :             GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    6143             :                                                &value);
    6144     4447190 :             reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6145             :                 value;
    6146     1482400 :         }
    6147             :     }
    6148             :     else
    6149             :     {
    6150    47343762 :         const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
    6151    47343762 :         const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
    6152             : 
    6153             :         float afCoeffsX[4];
    6154             :         float afCoeffsY[4];
    6155    47343762 :         GWKCubicComputeWeights(fDeltaX, afCoeffsX);
    6156    47343762 :         GWKCubicComputeWeights(fDeltaY, afCoeffsY);
    6157    47343762 :         const auto weightsX = _mm_loadu_ps(afCoeffsX);
    6158             :         const auto weightsXY0 =
    6159    94687424 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
    6160             :         const auto weightsXY1 =
    6161    94687424 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
    6162             :         const auto weightsXY2 =
    6163    94687424 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
    6164             :         const auto weightsXY3 =
    6165    47343762 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
    6166             : 
    6167    47343762 :         const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
    6168             : 
    6169    47343762 :         int iBand = 0;
    6170             :         // Process 2 bands at a time
    6171    94687424 :         for (; iBand + 1 < poWK->nBands; iBand += 2)
    6172             :         {
    6173    47343762 :             const T *CPL_RESTRICT pBand0 =
    6174    47343762 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6175    47343762 :             const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
    6176             :             const auto row1_0 =
    6177    47343762 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6178             :             const auto row2_0 =
    6179    47343762 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6180             :             const auto row3_0 =
    6181    47343762 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6182             : 
    6183    47343762 :             const T *CPL_RESTRICT pBand1 =
    6184    47343762 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
    6185    47343762 :             const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
    6186             :             const auto row1_1 =
    6187    47343762 :                 XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
    6188             :             const auto row2_1 =
    6189    47343762 :                 XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
    6190             :             const auto row3_1 =
    6191    47343762 :                 XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
    6192             : 
    6193             :             const float fValue_0 =
    6194    47343762 :                 Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
    6195             :                              weightsXY1, weightsXY2, weightsXY3);
    6196             : 
    6197             :             const float fValue_1 =
    6198    47343762 :                 Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
    6199             :                              weightsXY1, weightsXY2, weightsXY3);
    6200             : 
    6201    47343762 :             T *CPL_RESTRICT pDstBand0 =
    6202    47343762 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6203    47343762 :             pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
    6204             : 
    6205    47343762 :             T *CPL_RESTRICT pDstBand1 =
    6206    47343762 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
    6207    47343762 :             pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
    6208             :         }
    6209    47343762 :         if (iBand < poWK->nBands)
    6210             :         {
    6211    47343762 :             const T *CPL_RESTRICT pBand0 =
    6212    47343762 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6213    47343762 :             const auto row0 = XMMLoad4Values(pBand0 + iOffset);
    6214             :             const auto row1 =
    6215    47343762 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6216             :             const auto row2 =
    6217    47343762 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6218             :             const auto row3 =
    6219    47343762 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6220             : 
    6221             :             const float fValue =
    6222    47343762 :                 Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
    6223             :                              weightsXY2, weightsXY3);
    6224             : 
    6225    47343762 :             T *CPL_RESTRICT pDstBand =
    6226    47343762 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6227    47343762 :             pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
    6228             :         }
    6229             :     }
    6230             : 
    6231    48826142 :     if (poWK->pafDstDensity)
    6232    46672101 :         poWK->pafDstDensity[iDstOffset] = 1.0f;
    6233    48826142 : }
    6234             : 
    6235             : #endif  // defined(USE_SSE2)
    6236             : 
    6237             : /************************************************************************/
    6238             : /*          GWKResampleNoMasksOrDstDensityOnlyThreadInternal()          */
    6239             : /************************************************************************/
    6240             : 
    6241             : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
    6242        1984 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
    6243             : 
    6244             : {
    6245        1984 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6246        1984 :     GDALWarpKernel *poWK = psJob->poWK;
    6247        1984 :     const int iYMin = psJob->iYMin;
    6248        1984 :     const int iYMax = psJob->iYMax;
    6249        1966 :     const double dfMultFactorVerticalShiftPipeline =
    6250        1984 :         poWK->bApplyVerticalShift
    6251          18 :             ? CPLAtof(CSLFetchNameValueDef(
    6252          18 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6253             :                   "1.0"))
    6254             :             : 0.0;
    6255             : 
    6256        1984 :     const int nDstXSize = poWK->nDstXSize;
    6257        1984 :     const int nSrcXSize = poWK->nSrcXSize;
    6258        1984 :     const int nSrcYSize = poWK->nSrcYSize;
    6259             : 
    6260             :     /* -------------------------------------------------------------------- */
    6261             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6262             :     /*      scanlines worth of positions.                                   */
    6263             :     /* -------------------------------------------------------------------- */
    6264             : 
    6265             :     // For x, 2 *, because we cache the precomputed values at the end.
    6266             :     double *padfX =
    6267        1984 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6268             :     double *padfY =
    6269        1984 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6270             :     double *padfZ =
    6271        1984 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6272        1984 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6273             : 
    6274        1984 :     const int nXRadius = poWK->nXRadius;
    6275             :     double *padfWeightsX =
    6276        1984 :         static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
    6277             :     double *padfWeightsY = static_cast<double *>(
    6278        1984 :         CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
    6279        1984 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6280        1984 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6281        1984 :     const double dfErrorThreshold = CPLAtof(
    6282        1984 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6283             : 
    6284             :     // Precompute values.
    6285      493219 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6286      491235 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6287             : 
    6288             :     /* ==================================================================== */
    6289             :     /*      Loop over output lines.                                         */
    6290             :     /* ==================================================================== */
    6291      313089 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6292             :     {
    6293             :         /* --------------------------------------------------------------------
    6294             :          */
    6295             :         /*      Setup points to transform to source image space. */
    6296             :         /* --------------------------------------------------------------------
    6297             :          */
    6298      311106 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6299      311106 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6300   108893195 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6301   108582073 :             padfY[iDstX] = dfY;
    6302      311106 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6303             : 
    6304             :         /* --------------------------------------------------------------------
    6305             :          */
    6306             :         /*      Transform the points from destination pixel/line coordinates */
    6307             :         /*      to source pixel/line coordinates. */
    6308             :         /* --------------------------------------------------------------------
    6309             :          */
    6310      311106 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6311             :                              padfY, padfZ, pabSuccess);
    6312      311106 :         if (dfSrcCoordPrecision > 0.0)
    6313             :         {
    6314        1000 :             GWKRoundSourceCoordinates(
    6315             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6316             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6317        1000 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6318             :         }
    6319             : 
    6320             :         /* ====================================================================
    6321             :          */
    6322             :         /*      Loop over pixels in output scanline. */
    6323             :         /* ====================================================================
    6324             :          */
    6325   108893195 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6326             :         {
    6327   108582073 :             GPtrDiff_t iSrcOffset = 0;
    6328   108582073 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6329             :                                               padfX, padfY, nSrcXSize,
    6330             :                                               nSrcYSize, iSrcOffset))
    6331    61444108 :                 continue;
    6332             : 
    6333             :             /* ====================================================================
    6334             :              */
    6335             :             /*      Loop processing each band. */
    6336             :             /* ====================================================================
    6337             :              */
    6338    95964087 :             const GPtrDiff_t iDstOffset =
    6339    95964087 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6340             : 
    6341             : #if defined(USE_SSE2)
    6342             :             if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
    6343             :                           (std::is_same<T, GByte>::value ||
    6344             :                            std::is_same<T, GUInt16>::value))
    6345             :             {
    6346    49891741 :                 if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
    6347             :                 {
    6348    48826142 :                     GWKCubicResampleNoMasks4MultiBandT<T>(
    6349    48826142 :                         poWK, padfX[iDstX] - poWK->nSrcXOff,
    6350    48826142 :                         padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
    6351             : 
    6352    48826142 :                     continue;
    6353             :                 }
    6354             :             }
    6355             : #endif  // defined(USE_SSE2)
    6356             : 
    6357    47137958 :             [[maybe_unused]] double dfInvWeights = 0;
    6358   127960488 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6359             :             {
    6360    80822324 :                 T value = 0;
    6361             :                 if constexpr (eResample == GRA_NearestNeighbour)
    6362             :                 {
    6363    73097530 :                     value = reinterpret_cast<T *>(
    6364    73097530 :                         poWK->papabySrcImage[iBand])[iSrcOffset];
    6365             :                 }
    6366             :                 else if constexpr (bUse4SamplesFormula)
    6367             :                 {
    6368             :                     if constexpr (eResample == GRA_Bilinear)
    6369     4041681 :                         GWKBilinearResampleNoMasks4SampleT(
    6370     4041681 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6371     4041681 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6372             :                     else
    6373     2300964 :                         GWKCubicResampleNoMasks4SampleT(
    6374     2300964 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6375     2300964 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6376             :                 }
    6377             :                 else
    6378             :                 {
    6379     1382149 :                     GWKResampleNoMasksT(
    6380     1382149 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6381     1382149 :                         padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
    6382             :                         padfWeightsY, dfInvWeights);
    6383             :                 }
    6384             : 
    6385    80822324 :                 if (poWK->bApplyVerticalShift)
    6386             :                 {
    6387         818 :                     if (!std::isfinite(padfZ[iDstX]))
    6388           0 :                         continue;
    6389             :                     // Subtract padfZ[] since the coordinate transformation is
    6390             :                     // from target to source
    6391         818 :                     value = GWKClampValueT<T>(
    6392         818 :                         double(value) * poWK->dfMultFactorVerticalShift -
    6393         818 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6394             :                 }
    6395             : 
    6396    80822324 :                 if (poWK->pafDstDensity)
    6397     8224397 :                     poWK->pafDstDensity[iDstOffset] = 1.0f;
    6398             : 
    6399    80822324 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6400             :                     value;
    6401             :             }
    6402             :         }
    6403             : 
    6404             :         /* --------------------------------------------------------------------
    6405             :          */
    6406             :         /*      Report progress to the user, and optionally cancel out. */
    6407             :         /* --------------------------------------------------------------------
    6408             :          */
    6409      311106 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6410           1 :             break;
    6411             :     }
    6412             : 
    6413             :     /* -------------------------------------------------------------------- */
    6414             :     /*      Cleanup and return.                                             */
    6415             :     /* -------------------------------------------------------------------- */
    6416        1984 :     CPLFree(padfX);
    6417        1984 :     CPLFree(padfY);
    6418        1984 :     CPLFree(padfZ);
    6419        1984 :     CPLFree(pabSuccess);
    6420        1984 :     CPLFree(padfWeightsX);
    6421        1984 :     CPLFree(padfWeightsY);
    6422        1984 : }
    6423             : 
    6424             : template <class T, GDALResampleAlg eResample>
    6425         960 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
    6426             : {
    6427         960 :     GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6428             :         pData);
    6429         960 : }
    6430             : 
    6431             : template <class T, GDALResampleAlg eResample>
    6432        1024 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
    6433             : 
    6434             : {
    6435        1024 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6436        1024 :     GDALWarpKernel *poWK = psJob->poWK;
    6437             :     static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
    6438        1024 :     const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
    6439        1024 :     if (bUse4SamplesFormula)
    6440         969 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
    6441             :             pData);
    6442             :     else
    6443          55 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6444             :             pData);
    6445        1024 : }
    6446             : 
    6447         909 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6448             : {
    6449         909 :     return GWKRun(
    6450             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
    6451         909 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
    6452             : }
    6453             : 
    6454         128 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6455             : {
    6456         128 :     return GWKRun(
    6457             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
    6458             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
    6459         128 :                                                            GRA_Bilinear>);
    6460             : }
    6461             : 
    6462         850 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6463             : {
    6464         850 :     return GWKRun(
    6465             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
    6466         850 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
    6467             : }
    6468             : 
    6469           9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6470             : {
    6471           9 :     return GWKRun(
    6472             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
    6473           9 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
    6474             : }
    6475             : 
    6476             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6477             : 
    6478             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6479             : {
    6480             :     return GWKRun(
    6481             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
    6482             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
    6483             : }
    6484             : #endif
    6485             : 
    6486          12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6487             : {
    6488          12 :     return GWKRun(
    6489             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
    6490          12 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
    6491             : }
    6492             : 
    6493             : /************************************************************************/
    6494             : /*                          GWKNearestByte()                            */
    6495             : /*                                                                      */
    6496             : /*      Case for 8bit input data with nearest neighbour resampling      */
    6497             : /*      using valid flags. Should be as fast as possible for this       */
    6498             : /*      particular transformation type.                                 */
    6499             : /************************************************************************/
    6500             : 
    6501         476 : template <class T> static void GWKNearestThread(void *pData)
    6502             : 
    6503             : {
    6504         476 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6505         476 :     GDALWarpKernel *poWK = psJob->poWK;
    6506         476 :     const int iYMin = psJob->iYMin;
    6507         476 :     const int iYMax = psJob->iYMax;
    6508         476 :     const double dfMultFactorVerticalShiftPipeline =
    6509         476 :         poWK->bApplyVerticalShift
    6510           0 :             ? CPLAtof(CSLFetchNameValueDef(
    6511           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6512             :                   "1.0"))
    6513             :             : 0.0;
    6514         476 :     const bool bAvoidNoDataSingleBand =
    6515         545 :         poWK->nBands == 1 ||
    6516          69 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    6517             :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    6518             : 
    6519         476 :     const int nDstXSize = poWK->nDstXSize;
    6520         476 :     const int nSrcXSize = poWK->nSrcXSize;
    6521         476 :     const int nSrcYSize = poWK->nSrcYSize;
    6522             : 
    6523             :     /* -------------------------------------------------------------------- */
    6524             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6525             :     /*      scanlines worth of positions.                                   */
    6526             :     /* -------------------------------------------------------------------- */
    6527             : 
    6528             :     // For x, 2 *, because we cache the precomputed values at the end.
    6529             :     double *padfX =
    6530         476 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6531             :     double *padfY =
    6532         476 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6533             :     double *padfZ =
    6534         476 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6535         476 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6536             : 
    6537         476 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6538         476 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6539         476 :     const double dfErrorThreshold = CPLAtof(
    6540         476 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6541             : 
    6542             :     const bool bOneSourceCornerFailsToReproject =
    6543         476 :         GWKOneSourceCornerFailsToReproject(psJob);
    6544             : 
    6545             :     // Precompute values.
    6546       80555 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6547       80079 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6548             : 
    6549             :     /* ==================================================================== */
    6550             :     /*      Loop over output lines.                                         */
    6551             :     /* ==================================================================== */
    6552       64711 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6553             :     {
    6554             : 
    6555             :         /* --------------------------------------------------------------------
    6556             :          */
    6557             :         /*      Setup points to transform to source image space. */
    6558             :         /* --------------------------------------------------------------------
    6559             :          */
    6560       64235 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6561       64235 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6562    33836597 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6563    33772441 :             padfY[iDstX] = dfY;
    6564       64235 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6565             : 
    6566             :         /* --------------------------------------------------------------------
    6567             :          */
    6568             :         /*      Transform the points from destination pixel/line coordinates */
    6569             :         /*      to source pixel/line coordinates. */
    6570             :         /* --------------------------------------------------------------------
    6571             :          */
    6572       64235 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6573             :                              padfY, padfZ, pabSuccess);
    6574       64235 :         if (dfSrcCoordPrecision > 0.0)
    6575             :         {
    6576           0 :             GWKRoundSourceCoordinates(
    6577             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6578             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6579           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6580             :         }
    6581             :         /* ====================================================================
    6582             :          */
    6583             :         /*      Loop over pixels in output scanline. */
    6584             :         /* ====================================================================
    6585             :          */
    6586    33836597 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6587             :         {
    6588    33772441 :             GPtrDiff_t iSrcOffset = 0;
    6589    33772441 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6590             :                                               padfX, padfY, nSrcXSize,
    6591             :                                               nSrcYSize, iSrcOffset))
    6592    21383643 :                 continue;
    6593             : 
    6594             :             /* --------------------------------------------------------------------
    6595             :              */
    6596             :             /*      Do not try to apply invalid source pixels to the dest. */
    6597             :             /* --------------------------------------------------------------------
    6598             :              */
    6599    25227005 :             if (poWK->panUnifiedSrcValid != nullptr &&
    6600     6714445 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6601             :             {
    6602     5120982 :                 if (!bOneSourceCornerFailsToReproject)
    6603             :                 {
    6604     5113496 :                     continue;
    6605             :                 }
    6606        7485 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    6607             :                 {
    6608        5224 :                     continue;
    6609             :                 }
    6610             :             }
    6611             : 
    6612             :             /* --------------------------------------------------------------------
    6613             :              */
    6614             :             /*      Do not try to apply transparent source pixels to the
    6615             :              * destination.*/
    6616             :             /* --------------------------------------------------------------------
    6617             :              */
    6618    13393880 :             double dfDensity = 1.0;
    6619             : 
    6620    13393880 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    6621             :             {
    6622     1557335 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    6623     1557335 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    6624     1005075 :                     continue;
    6625             :             }
    6626             : 
    6627             :             /* ====================================================================
    6628             :              */
    6629             :             /*      Loop processing each band. */
    6630             :             /* ====================================================================
    6631             :              */
    6632             : 
    6633    12388798 :             const GPtrDiff_t iDstOffset =
    6634    12388798 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6635             : 
    6636    27339658 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6637             :             {
    6638    14950960 :                 T value = 0;
    6639    14950960 :                 double dfBandDensity = 0.0;
    6640             : 
    6641             :                 /* --------------------------------------------------------------------
    6642             :                  */
    6643             :                 /*      Collect the source value. */
    6644             :                 /* --------------------------------------------------------------------
    6645             :                  */
    6646    14950960 :                 if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
    6647             :                                  &value))
    6648             :                 {
    6649             : 
    6650    14950860 :                     if (poWK->bApplyVerticalShift)
    6651             :                     {
    6652           0 :                         if (!std::isfinite(padfZ[iDstX]))
    6653           0 :                             continue;
    6654             :                         // Subtract padfZ[] since the coordinate transformation
    6655             :                         // is from target to source
    6656           0 :                         value = GWKClampValueT<T>(
    6657           0 :                             double(value) * poWK->dfMultFactorVerticalShift -
    6658           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6659             :                     }
    6660             : 
    6661    14950860 :                     GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    6662             :                                           dfBandDensity, value,
    6663             :                                           bAvoidNoDataSingleBand);
    6664             :                 }
    6665             :             }
    6666             : 
    6667             :             /* --------------------------------------------------------------------
    6668             :              */
    6669             :             /*      Mark this pixel valid/opaque in the output. */
    6670             :             /* --------------------------------------------------------------------
    6671             :              */
    6672             : 
    6673    12388798 :             if (!bAvoidNoDataSingleBand)
    6674             :             {
    6675      424278 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    6676             :             }
    6677             : 
    6678    12388798 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6679             : 
    6680    12388798 :             if (poWK->panDstValid != nullptr)
    6681             :             {
    6682    11118345 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6683             :             }
    6684             :         } /* Next iDstX */
    6685             : 
    6686             :         /* --------------------------------------------------------------------
    6687             :          */
    6688             :         /*      Report progress to the user, and optionally cancel out. */
    6689             :         /* --------------------------------------------------------------------
    6690             :          */
    6691       64235 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6692           0 :             break;
    6693             :     }
    6694             : 
    6695             :     /* -------------------------------------------------------------------- */
    6696             :     /*      Cleanup and return.                                             */
    6697             :     /* -------------------------------------------------------------------- */
    6698         476 :     CPLFree(padfX);
    6699         476 :     CPLFree(padfY);
    6700         476 :     CPLFree(padfZ);
    6701         476 :     CPLFree(pabSuccess);
    6702         476 : }
    6703             : 
    6704         363 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
    6705             : {
    6706         363 :     return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
    6707             : }
    6708             : 
    6709          14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6710             : {
    6711          14 :     return GWKRun(
    6712             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
    6713          14 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
    6714             : }
    6715             : 
    6716           5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6717             : {
    6718           5 :     return GWKRun(
    6719             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
    6720             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
    6721           5 :                                                            GRA_Bilinear>);
    6722             : }
    6723             : 
    6724           6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6725             : {
    6726           6 :     return GWKRun(
    6727             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
    6728             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
    6729           6 :                                                            GRA_Bilinear>);
    6730             : }
    6731             : 
    6732           4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6733             : {
    6734           4 :     return GWKRun(
    6735             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
    6736             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
    6737           4 :                                                            GRA_Bilinear>);
    6738             : }
    6739             : 
    6740             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6741             : 
    6742             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6743             : {
    6744             :     return GWKRun(
    6745             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
    6746             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
    6747             :                                                            GRA_Bilinear>);
    6748             : }
    6749             : #endif
    6750             : 
    6751           5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6752             : {
    6753           5 :     return GWKRun(
    6754             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
    6755           5 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
    6756             : }
    6757             : 
    6758          14 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6759             : {
    6760          14 :     return GWKRun(
    6761             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
    6762          14 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
    6763             : }
    6764             : 
    6765           6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6766             : {
    6767           6 :     return GWKRun(
    6768             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
    6769           6 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
    6770             : }
    6771             : 
    6772           5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6773             : {
    6774           5 :     return GWKRun(
    6775             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
    6776           5 :         GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
    6777             : }
    6778             : 
    6779           9 : static CPLErr GWKNearestInt8(GDALWarpKernel *poWK)
    6780             : {
    6781           9 :     return GWKRun(poWK, "GWKNearestInt8", GWKNearestThread<int8_t>);
    6782             : }
    6783             : 
    6784          40 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
    6785             : {
    6786          40 :     return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
    6787             : }
    6788             : 
    6789          10 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
    6790             : {
    6791          10 :     return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
    6792             : }
    6793             : 
    6794          11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6795             : {
    6796          11 :     return GWKRun(
    6797             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
    6798          11 :         GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
    6799             : }
    6800             : 
    6801          50 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
    6802             : {
    6803          50 :     return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
    6804             : }
    6805             : 
    6806             : /************************************************************************/
    6807             : /*                           GWKAverageOrMode()                         */
    6808             : /*                                                                      */
    6809             : /************************************************************************/
    6810             : 
    6811             : #define COMPUTE_WEIGHT_Y(iSrcY)                                                \
    6812             :     ((iSrcY == iSrcYMin)                                                       \
    6813             :          ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin))        \
    6814             :      : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax)                       \
    6815             :                                : 1.0)
    6816             : 
    6817             : #define COMPUTE_WEIGHT(iSrcX, dfWeightY)                                       \
    6818             :     ((iSrcX == iSrcXMin)       ? ((iSrcXMin + 1 == iSrcXMax)                   \
    6819             :                                       ? dfWeightY                              \
    6820             :                                       : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
    6821             :      : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax))         \
    6822             :                                : dfWeightY)
    6823             : 
    6824             : static void GWKAverageOrModeThread(void *pData);
    6825             : 
    6826         246 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
    6827             : {
    6828         246 :     return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
    6829             : }
    6830             : 
    6831             : /************************************************************************/
    6832             : /*                 GWKAverageOrModeComputeLineCoords()                  */
    6833             : /************************************************************************/
    6834             : 
    6835       28663 : static void GWKAverageOrModeComputeLineCoords(
    6836             :     const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
    6837             :     double *padfY2, double *padfZ, double *padfZ2, int *pabSuccess,
    6838             :     int *pabSuccess2, int iDstY, double dfSrcCoordPrecision,
    6839             :     double dfErrorThreshold)
    6840             : {
    6841       28663 :     const GDALWarpKernel *poWK = psJob->poWK;
    6842       28663 :     const int nDstXSize = poWK->nDstXSize;
    6843             : 
    6844             :     // Setup points to transform to source image space.
    6845     7360890 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6846             :     {
    6847     7332220 :         padfX[iDstX] = iDstX + poWK->nDstXOff;
    6848     7332220 :         padfY[iDstX] = iDstY + poWK->nDstYOff;
    6849     7332220 :         padfZ[iDstX] = 0.0;
    6850     7332220 :         padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
    6851     7332220 :         padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
    6852     7332220 :         padfZ2[iDstX] = 0.0;
    6853             :     }
    6854             : 
    6855             :     /* ----------------------------------------------------------------- */
    6856             :     /*      Transform the points from destination pixel/line coordinates */
    6857             :     /*      to source pixel/line coordinates.                            */
    6858             :     /* ----------------------------------------------------------------- */
    6859       28663 :     poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY,
    6860             :                          padfZ, pabSuccess);
    6861       28663 :     poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
    6862             :                          padfY2, padfZ2, pabSuccess2);
    6863             : 
    6864       28663 :     if (dfSrcCoordPrecision > 0.0)
    6865             :     {
    6866           0 :         GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
    6867             :                                   dfSrcCoordPrecision, dfErrorThreshold,
    6868           0 :                                   poWK->pfnTransformer, psJob->pTransformerArg,
    6869           0 :                                   poWK->nDstXOff, iDstY + poWK->nDstYOff);
    6870           0 :         GWKRoundSourceCoordinates(
    6871             :             nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision,
    6872           0 :             dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6873           0 :             1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff);
    6874             :     }
    6875       28663 : }
    6876             : 
    6877             : /************************************************************************/
    6878             : /*                GWKAverageOrModeComputeSourceCoords()                 */
    6879             : /************************************************************************/
    6880             : 
    6881     7332220 : static bool GWKAverageOrModeComputeSourceCoords(
    6882             :     const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
    6883             :     double *padfY2, int iDstX, int iDstY, int nXMargin, int nYMargin,
    6884             :     // Output:
    6885             :     bool &bWrapOverX, double &dfXMin, double &dfYMin, double &dfXMax,
    6886             :     double &dfYMax, int &iSrcXMin, int &iSrcYMin, int &iSrcXMax, int &iSrcYMax)
    6887             : {
    6888     7332220 :     const GDALWarpKernel *poWK = psJob->poWK;
    6889     7332220 :     const int nSrcXSize = poWK->nSrcXSize;
    6890     7332220 :     const int nSrcYSize = poWK->nSrcYSize;
    6891             : 
    6892             :     // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
    6893             :     // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
    6894     7332220 :     if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6895     6814810 :           padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6896     6814810 :           padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6897     6532210 :           padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6898     6532210 :           padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6899     5870420 :           padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6900     5865780 :           padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
    6901     5350790 :           padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
    6902             :     {
    6903     1985190 :         return false;
    6904             :     }
    6905             : 
    6906             :     // Compute corners in source crs.
    6907             : 
    6908             :     // The transformation might not have preserved ordering of
    6909             :     // coordinates so do the necessary swapping (#5433).
    6910             :     // NOTE: this is really an approximative fix. To do something
    6911             :     // more precise we would for example need to compute the
    6912             :     // transformation of coordinates in the
    6913             :     // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
    6914             :     // coordinates, and take the bounding box of the got source
    6915             :     // coordinates.
    6916             : 
    6917     5347040 :     if (padfX[iDstX] > padfX2[iDstX])
    6918      269148 :         std::swap(padfX[iDstX], padfX2[iDstX]);
    6919             : 
    6920             :     // Detect situations where the target pixel is close to the
    6921             :     // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
    6922             :     // close to the left-most and right-most columns of the source
    6923             :     // raster. The 2 value below was experimentally determined to
    6924             :     // avoid false-positives and false-negatives.
    6925             :     // Addresses https://github.com/OSGeo/gdal/issues/6478
    6926     5347040 :     bWrapOverX = false;
    6927     5347040 :     const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
    6928     5347040 :     if (poWK->nSrcXOff == 0 && iDstX + 1 < poWK->nDstXSize &&
    6929     3298690 :         2 * padfX[iDstX] - padfX[iDstX + 1] < nThresholdWrapOverX &&
    6930       55362 :         nSrcXSize - padfX2[iDstX] < nThresholdWrapOverX)
    6931             :     {
    6932             :         // Check there is a discontinuity by checking at mid-pixel.
    6933             :         // NOTE: all this remains fragile. To confidently
    6934             :         // detect antimeridian warping we should probably try to access
    6935             :         // georeferenced coordinates, and not rely only on tests on
    6936             :         // image space coordinates. But accessing georeferenced
    6937             :         // coordinates from here is not trivial, and we would for example
    6938             :         // have to handle both geographic, Mercator, etc.
    6939             :         // Let's hope this heuristics is good enough for now.
    6940        1610 :         double x = iDstX + 0.5 + poWK->nDstXOff;
    6941        1610 :         double y = iDstY + poWK->nDstYOff;
    6942        1610 :         double z = 0;
    6943        1610 :         int bSuccess = FALSE;
    6944        1610 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y, &z,
    6945             :                              &bSuccess);
    6946        1610 :         if (bSuccess && x < padfX[iDstX])
    6947             :         {
    6948        1596 :             bWrapOverX = true;
    6949        1596 :             std::swap(padfX[iDstX], padfX2[iDstX]);
    6950        1596 :             padfX2[iDstX] += nSrcXSize;
    6951             :         }
    6952             :     }
    6953             : 
    6954     5347040 :     dfXMin = padfX[iDstX] - poWK->nSrcXOff;
    6955     5347040 :     dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
    6956     5347040 :     constexpr double EPSILON = 1e-10;
    6957             :     // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
    6958     5347040 :     if (!(dfXMax > -EPSILON && dfXMin < nSrcXSize + EPSILON))
    6959       15528 :         return false;
    6960     5331510 :     iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPSILON), 0.0));
    6961     5331510 :     iSrcXMax = static_cast<int>(
    6962     5331510 :         std::min(ceil(dfXMax - EPSILON), static_cast<double>(INT_MAX)));
    6963     5331510 :     if (!bWrapOverX)
    6964     5329910 :         iSrcXMax = std::min(iSrcXMax, nSrcXSize);
    6965     5331510 :     if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
    6966         472 :         iSrcXMax++;
    6967             : 
    6968     5331510 :     if (padfY[iDstX] > padfY2[iDstX])
    6969      270117 :         std::swap(padfY[iDstX], padfY2[iDstX]);
    6970     5331510 :     dfYMin = padfY[iDstX] - poWK->nSrcYOff;
    6971     5331510 :     dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
    6972             :     // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
    6973     5331510 :     if (!(dfYMax > -EPSILON && dfYMin < nSrcYSize + EPSILON))
    6974       13334 :         return false;
    6975     5318180 :     iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPSILON), 0.0));
    6976     5318180 :     iSrcYMax = std::min(static_cast<int>(ceil(dfYMax - EPSILON)), nSrcYSize);
    6977     5318180 :     if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
    6978           0 :         iSrcYMax++;
    6979             : 
    6980     5318180 :     return true;
    6981             : }
    6982             : 
    6983             : /************************************************************************/
    6984             : /*                          GWKModeRealType()                           */
    6985             : /************************************************************************/
    6986             : 
    6987       17780 : template <class T> static inline bool IsSame(T a, T b)
    6988             : {
    6989       17780 :     return a == b;
    6990             : }
    6991             : 
    6992           0 : template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b)
    6993             : {
    6994           0 :     return a == b || (CPLIsNan(a) && CPLIsNan(b));
    6995             : }
    6996             : 
    6997          18 : template <> bool IsSame<float>(float a, float b)
    6998             : {
    6999          18 :     return a == b || (std::isnan(a) && std::isnan(b));
    7000             : }
    7001             : 
    7002          56 : template <> bool IsSame<double>(double a, double b)
    7003             : {
    7004          56 :     return a == b || (std::isnan(a) && std::isnan(b));
    7005             : }
    7006             : 
    7007          19 : template <class T> static void GWKModeRealType(GWKJobStruct *psJob)
    7008             : {
    7009          19 :     const GDALWarpKernel *poWK = psJob->poWK;
    7010          19 :     const int iYMin = psJob->iYMin;
    7011          19 :     const int iYMax = psJob->iYMax;
    7012          19 :     const int nDstXSize = poWK->nDstXSize;
    7013          19 :     const int nSrcXSize = poWK->nSrcXSize;
    7014          19 :     const int nSrcYSize = poWK->nSrcYSize;
    7015          19 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7016             : 
    7017          19 :     T *pVals = nullptr;
    7018          19 :     float *pafCounts = nullptr;
    7019             : 
    7020          19 :     if (nSrcXSize > 0 && nSrcYSize > 0)
    7021             :     {
    7022             :         pVals = static_cast<T *>(
    7023          19 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(T)));
    7024             :         pafCounts = static_cast<float *>(
    7025          19 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    7026          19 :         if (pVals == nullptr || pafCounts == nullptr)
    7027             :         {
    7028           0 :             VSIFree(pVals);
    7029           0 :             VSIFree(pafCounts);
    7030           0 :             return;
    7031             :         }
    7032             :     }
    7033             : 
    7034             :     /* -------------------------------------------------------------------- */
    7035             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7036             :     /*      scanlines worth of positions.                                   */
    7037             :     /* -------------------------------------------------------------------- */
    7038             : 
    7039             :     double *padfX =
    7040          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7041             :     double *padfY =
    7042          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7043             :     double *padfZ =
    7044          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7045             :     double *padfX2 =
    7046          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7047             :     double *padfY2 =
    7048          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7049             :     double *padfZ2 =
    7050          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7051          19 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7052          19 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7053             : 
    7054          19 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7055          19 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7056          19 :     const double dfErrorThreshold = CPLAtof(
    7057          19 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7058          19 :     const bool bAvoidNoDataSingleBand =
    7059          19 :         poWK->nBands == 1 ||
    7060           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7061             :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    7062             : 
    7063          19 :     const int nXMargin =
    7064          19 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7065          19 :     const int nYMargin =
    7066          19 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7067             : 
    7068             :     /* ==================================================================== */
    7069             :     /*      Loop over output lines.                                         */
    7070             :     /* ==================================================================== */
    7071         116 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7072             :     {
    7073          97 :         GWKAverageOrModeComputeLineCoords(
    7074             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7075             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7076             : 
    7077             :         // Loop over pixels in output scanline.
    7078        3514 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7079             :         {
    7080        3417 :             GPtrDiff_t iSrcOffset = 0;
    7081        3417 :             double dfDensity = 1.0;
    7082        3417 :             bool bHasFoundDensity = false;
    7083             : 
    7084        3417 :             bool bWrapOverX = false;
    7085        3417 :             double dfXMin = 0;
    7086        3417 :             double dfYMin = 0;
    7087        3417 :             double dfXMax = 0;
    7088        3417 :             double dfYMax = 0;
    7089        3417 :             int iSrcXMin = 0;
    7090        3417 :             int iSrcYMin = 0;
    7091        3417 :             int iSrcXMax = 0;
    7092        3417 :             int iSrcYMax = 0;
    7093        3417 :             if (!GWKAverageOrModeComputeSourceCoords(
    7094             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7095             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7096             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7097             :             {
    7098           0 :                 continue;
    7099             :             }
    7100             : 
    7101        3417 :             const GPtrDiff_t iDstOffset =
    7102        3417 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7103             : 
    7104             :             // Loop processing each band.
    7105        6834 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7106             :             {
    7107        3417 :                 double dfBandDensity = 0.0;
    7108             : 
    7109        3417 :                 int nBins = 0;
    7110        3417 :                 int iModeIndex = -1;
    7111        3417 :                 T nVal{};
    7112             : 
    7113       10248 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7114             :                 {
    7115        6831 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7116        6831 :                     iSrcOffset =
    7117        6831 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7118       20530 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7119             :                          iSrcX++, iSrcOffset++)
    7120             :                     {
    7121       13699 :                         if (bWrapOverX)
    7122           0 :                             iSrcOffset =
    7123           0 :                                 (iSrcX % nSrcXSize) +
    7124           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7125             : 
    7126       13699 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7127           0 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7128           0 :                             continue;
    7129             : 
    7130       13699 :                         if (GWKGetPixelT(poWK, iBand, iSrcOffset,
    7131       27398 :                                          &dfBandDensity, &nVal) &&
    7132       13699 :                             dfBandDensity > BAND_DENSITY_THRESHOLD)
    7133             :                         {
    7134       13699 :                             const double dfWeight =
    7135       13699 :                                 COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7136             : 
    7137             :                             // Check array for existing entry.
    7138       13699 :                             int i = 0;
    7139       29194 :                             for (i = 0; i < nBins; ++i)
    7140             :                             {
    7141       17807 :                                 if (IsSame(pVals[i], nVal))
    7142             :                                 {
    7143             : 
    7144        2312 :                                     pafCounts[i] +=
    7145        2312 :                                         static_cast<float>(dfWeight);
    7146        2312 :                                     bool bValIsMaxCount =
    7147        2312 :                                         (pafCounts[i] > pafCounts[iModeIndex]);
    7148             : 
    7149        2312 :                                     if (!bValIsMaxCount &&
    7150        1498 :                                         pafCounts[i] == pafCounts[iModeIndex])
    7151             :                                     {
    7152        1490 :                                         switch (eTieStrategy)
    7153             :                                         {
    7154        1477 :                                             case GWKTS_First:
    7155        1477 :                                                 break;
    7156           6 :                                             case GWKTS_Min:
    7157           6 :                                                 bValIsMaxCount =
    7158           6 :                                                     nVal < pVals[iModeIndex];
    7159           6 :                                                 break;
    7160           7 :                                             case GWKTS_Max:
    7161           7 :                                                 bValIsMaxCount =
    7162           7 :                                                     nVal > pVals[iModeIndex];
    7163           7 :                                                 break;
    7164             :                                         }
    7165             :                                     }
    7166             : 
    7167        2312 :                                     if (bValIsMaxCount)
    7168             :                                     {
    7169         817 :                                         iModeIndex = i;
    7170             :                                     }
    7171             : 
    7172        2312 :                                     break;
    7173             :                                 }
    7174             :                             }
    7175             : 
    7176             :                             // Add to arr if entry not already there.
    7177       13699 :                             if (i == nBins)
    7178             :                             {
    7179       11387 :                                 pVals[i] = nVal;
    7180       11387 :                                 pafCounts[i] = static_cast<float>(dfWeight);
    7181             : 
    7182       11387 :                                 if (iModeIndex < 0)
    7183        3417 :                                     iModeIndex = i;
    7184             : 
    7185       11387 :                                 ++nBins;
    7186             :                             }
    7187             :                         }
    7188             :                     }
    7189             :                 }
    7190             : 
    7191        3417 :                 if (iModeIndex != -1)
    7192             :                 {
    7193        3417 :                     nVal = pVals[iModeIndex];
    7194        3417 :                     dfBandDensity = 1;
    7195        3417 :                     bHasFoundDensity = true;
    7196             :                 }
    7197             : 
    7198             :                 // We have a computed value from the source.  Now apply it
    7199             :                 // to the destination pixel
    7200        3417 :                 if (bHasFoundDensity)
    7201             :                 {
    7202        3417 :                     GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    7203             :                                           dfBandDensity, nVal,
    7204             :                                           bAvoidNoDataSingleBand);
    7205             :                 }
    7206             :             }
    7207             : 
    7208        3417 :             if (!bHasFoundDensity)
    7209           0 :                 continue;
    7210             : 
    7211        3417 :             if (!bAvoidNoDataSingleBand)
    7212             :             {
    7213           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7214             :             }
    7215             : 
    7216             :             /* --------------------------------------------------------------------
    7217             :              */
    7218             :             /*      Update destination density/validity masks. */
    7219             :             /* --------------------------------------------------------------------
    7220             :              */
    7221        3417 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7222             : 
    7223        3417 :             if (poWK->panDstValid != nullptr)
    7224             :             {
    7225           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7226             :             }
    7227             :         } /* Next iDstX */
    7228             : 
    7229             :         /* --------------------------------------------------------------------
    7230             :          */
    7231             :         /*      Report progress to the user, and optionally cancel out. */
    7232             :         /* --------------------------------------------------------------------
    7233             :          */
    7234          97 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    7235           0 :             break;
    7236             :     }
    7237             : 
    7238             :     /* -------------------------------------------------------------------- */
    7239             :     /*      Cleanup and return.                                             */
    7240             :     /* -------------------------------------------------------------------- */
    7241          19 :     CPLFree(padfX);
    7242          19 :     CPLFree(padfY);
    7243          19 :     CPLFree(padfZ);
    7244          19 :     CPLFree(padfX2);
    7245          19 :     CPLFree(padfY2);
    7246          19 :     CPLFree(padfZ2);
    7247          19 :     CPLFree(pabSuccess);
    7248          19 :     CPLFree(pabSuccess2);
    7249          19 :     VSIFree(pVals);
    7250          19 :     VSIFree(pafCounts);
    7251             : }
    7252             : 
    7253             : /************************************************************************/
    7254             : /*                         GWKModeComplexType()                         */
    7255             : /************************************************************************/
    7256             : 
    7257           8 : static void GWKModeComplexType(GWKJobStruct *psJob)
    7258             : {
    7259           8 :     const GDALWarpKernel *poWK = psJob->poWK;
    7260           8 :     const int iYMin = psJob->iYMin;
    7261           8 :     const int iYMax = psJob->iYMax;
    7262           8 :     const int nDstXSize = poWK->nDstXSize;
    7263           8 :     const int nSrcXSize = poWK->nSrcXSize;
    7264           8 :     const int nSrcYSize = poWK->nSrcYSize;
    7265           8 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7266             :     const double dfMultFactorVerticalShiftPipeline =
    7267           8 :         poWK->bApplyVerticalShift
    7268           8 :             ? CPLAtof(CSLFetchNameValueDef(
    7269           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    7270             :                   "1.0"))
    7271           8 :             : 0.0;
    7272             :     const bool bAvoidNoDataSingleBand =
    7273           8 :         poWK->nBands == 1 ||
    7274           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7275           8 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    7276             : 
    7277           8 :     double *padfRealVals = nullptr;
    7278           8 :     double *padfImagVals = nullptr;
    7279           8 :     float *pafCounts = nullptr;
    7280             : 
    7281           8 :     if (nSrcXSize > 0 && nSrcYSize > 0)
    7282             :     {
    7283             :         padfRealVals = static_cast<double *>(
    7284           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
    7285             :         padfImagVals = static_cast<double *>(
    7286           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
    7287             :         pafCounts = static_cast<float *>(
    7288           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    7289           8 :         if (padfRealVals == nullptr || padfImagVals == nullptr ||
    7290             :             pafCounts == nullptr)
    7291             :         {
    7292           0 :             VSIFree(padfRealVals);
    7293           0 :             VSIFree(padfImagVals);
    7294           0 :             VSIFree(pafCounts);
    7295           0 :             return;
    7296             :         }
    7297             :     }
    7298             : 
    7299             :     /* -------------------------------------------------------------------- */
    7300             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7301             :     /*      scanlines worth of positions.                                   */
    7302             :     /* -------------------------------------------------------------------- */
    7303             : 
    7304             :     double *padfX =
    7305           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7306             :     double *padfY =
    7307           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7308             :     double *padfZ =
    7309           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7310             :     double *padfX2 =
    7311           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7312             :     double *padfY2 =
    7313           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7314             :     double *padfZ2 =
    7315           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7316           8 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7317           8 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7318             : 
    7319           8 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7320           8 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7321           8 :     const double dfErrorThreshold = CPLAtof(
    7322           8 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7323             : 
    7324             :     const int nXMargin =
    7325           8 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7326             :     const int nYMargin =
    7327           8 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7328             : 
    7329             :     /* ==================================================================== */
    7330             :     /*      Loop over output lines.                                         */
    7331             :     /* ==================================================================== */
    7332          16 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7333             :     {
    7334           8 :         GWKAverageOrModeComputeLineCoords(
    7335             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7336             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7337             : 
    7338             :         // Loop over pixels in output scanline.
    7339          16 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7340             :         {
    7341           8 :             GPtrDiff_t iSrcOffset = 0;
    7342           8 :             double dfDensity = 1.0;
    7343           8 :             bool bHasFoundDensity = false;
    7344             : 
    7345           8 :             bool bWrapOverX = false;
    7346           8 :             double dfXMin = 0;
    7347           8 :             double dfYMin = 0;
    7348           8 :             double dfXMax = 0;
    7349           8 :             double dfYMax = 0;
    7350           8 :             int iSrcXMin = 0;
    7351           8 :             int iSrcYMin = 0;
    7352           8 :             int iSrcXMax = 0;
    7353           8 :             int iSrcYMax = 0;
    7354           8 :             if (!GWKAverageOrModeComputeSourceCoords(
    7355             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7356             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7357             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7358             :             {
    7359           0 :                 continue;
    7360             :             }
    7361             : 
    7362           8 :             const GPtrDiff_t iDstOffset =
    7363           8 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7364             : 
    7365             :             // Loop processing each band.
    7366          16 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7367             :             {
    7368           8 :                 double dfBandDensity = 0.0;
    7369             : 
    7370           8 :                 int nBins = 0;
    7371           8 :                 int iModeIndex = -1;
    7372           8 :                 double dfValueReal = 0;
    7373           8 :                 double dfValueImag = 0;
    7374             : 
    7375          16 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7376             :                 {
    7377           8 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7378           8 :                     iSrcOffset =
    7379           8 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7380          38 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7381             :                          iSrcX++, iSrcOffset++)
    7382             :                     {
    7383          30 :                         if (bWrapOverX)
    7384           0 :                             iSrcOffset =
    7385           0 :                                 (iSrcX % nSrcXSize) +
    7386           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7387             : 
    7388          30 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7389           0 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7390           0 :                             continue;
    7391             : 
    7392          30 :                         if (GWKGetPixelValue(poWK, iBand, iSrcOffset,
    7393             :                                              &dfBandDensity, &dfValueReal,
    7394          60 :                                              &dfValueImag) &&
    7395          30 :                             dfBandDensity > BAND_DENSITY_THRESHOLD)
    7396             :                         {
    7397          30 :                             const double dfWeight =
    7398          30 :                                 COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7399             : 
    7400             :                             // Check array for existing entry.
    7401          30 :                             int i = 0;
    7402          49 :                             for (i = 0; i < nBins; ++i)
    7403             :                             {
    7404          47 :                                 if (IsSame(padfRealVals[i], dfValueReal) &&
    7405          14 :                                     IsSame(padfImagVals[i], dfValueImag))
    7406             :                                 {
    7407             : 
    7408          14 :                                     pafCounts[i] +=
    7409          14 :                                         static_cast<float>(dfWeight);
    7410          14 :                                     bool bValIsMaxCount =
    7411          14 :                                         (pafCounts[i] > pafCounts[iModeIndex]);
    7412             : 
    7413          14 :                                     if (!bValIsMaxCount &&
    7414           6 :                                         pafCounts[i] == pafCounts[iModeIndex])
    7415             :                                     {
    7416           3 :                                         switch (eTieStrategy)
    7417             :                                         {
    7418           3 :                                             case GWKTS_First:
    7419           3 :                                                 break;
    7420           0 :                                             case GWKTS_Min:
    7421           0 :                                                 bValIsMaxCount =
    7422           0 :                                                     dfValueReal <
    7423           0 :                                                     padfRealVals[iModeIndex];
    7424           0 :                                                 break;
    7425           0 :                                             case GWKTS_Max:
    7426           0 :                                                 bValIsMaxCount =
    7427           0 :                                                     dfValueReal >
    7428           0 :                                                     padfRealVals[iModeIndex];
    7429           0 :                                                 break;
    7430             :                                         }
    7431             :                                     }
    7432             : 
    7433          14 :                                     if (bValIsMaxCount)
    7434             :                                     {
    7435           8 :                                         iModeIndex = i;
    7436             :                                     }
    7437             : 
    7438          14 :                                     break;
    7439             :                                 }
    7440             :                             }
    7441             : 
    7442             :                             // Add to arr if entry not already there.
    7443          30 :                             if (i == nBins)
    7444             :                             {
    7445          16 :                                 padfRealVals[i] = dfValueReal;
    7446          16 :                                 padfImagVals[i] = dfValueImag;
    7447          16 :                                 pafCounts[i] = static_cast<float>(dfWeight);
    7448             : 
    7449          16 :                                 if (iModeIndex < 0)
    7450           8 :                                     iModeIndex = i;
    7451             : 
    7452          16 :                                 ++nBins;
    7453             :                             }
    7454             :                         }
    7455             :                     }
    7456             :                 }
    7457             : 
    7458           8 :                 if (iModeIndex != -1)
    7459             :                 {
    7460           8 :                     dfValueReal = padfRealVals[iModeIndex];
    7461           8 :                     dfValueImag = padfImagVals[iModeIndex];
    7462           8 :                     dfBandDensity = 1;
    7463             : 
    7464           8 :                     if (poWK->bApplyVerticalShift)
    7465             :                     {
    7466           0 :                         if (!std::isfinite(padfZ[iDstX]))
    7467           0 :                             continue;
    7468             :                         // Subtract padfZ[] since the coordinate
    7469             :                         // transformation is from target to source
    7470           0 :                         dfValueReal =
    7471           0 :                             dfValueReal * poWK->dfMultFactorVerticalShift -
    7472           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    7473             :                     }
    7474             : 
    7475           8 :                     bHasFoundDensity = true;
    7476             :                 }
    7477             : 
    7478             :                 // We have a computed value from the source.  Now apply it
    7479             :                 // to the destination pixel
    7480           8 :                 if (bHasFoundDensity)
    7481             :                 {
    7482           8 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    7483             :                                      dfValueReal, dfValueImag,
    7484             :                                      bAvoidNoDataSingleBand);
    7485             :                 }
    7486             :             }
    7487             : 
    7488           8 :             if (!bHasFoundDensity)
    7489           0 :                 continue;
    7490             : 
    7491           8 :             if (!bAvoidNoDataSingleBand)
    7492             :             {
    7493           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7494             :             }
    7495             : 
    7496             :             /* --------------------------------------------------------------------
    7497             :              */
    7498             :             /*      Update destination density/validity masks. */
    7499             :             /* --------------------------------------------------------------------
    7500             :              */
    7501           8 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7502             : 
    7503           8 :             if (poWK->panDstValid != nullptr)
    7504             :             {
    7505           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7506             :             }
    7507             :         } /* Next iDstX */
    7508             : 
    7509             :         /* --------------------------------------------------------------------
    7510             :          */
    7511             :         /*      Report progress to the user, and optionally cancel out. */
    7512             :         /* --------------------------------------------------------------------
    7513             :          */
    7514           8 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    7515           0 :             break;
    7516             :     }
    7517             : 
    7518             :     /* -------------------------------------------------------------------- */
    7519             :     /*      Cleanup and return.                                             */
    7520             :     /* -------------------------------------------------------------------- */
    7521           8 :     CPLFree(padfX);
    7522           8 :     CPLFree(padfY);
    7523           8 :     CPLFree(padfZ);
    7524           8 :     CPLFree(padfX2);
    7525           8 :     CPLFree(padfY2);
    7526           8 :     CPLFree(padfZ2);
    7527           8 :     CPLFree(pabSuccess);
    7528           8 :     CPLFree(pabSuccess2);
    7529           8 :     VSIFree(padfRealVals);
    7530           8 :     VSIFree(padfImagVals);
    7531           8 :     VSIFree(pafCounts);
    7532             : }
    7533             : 
    7534             : /************************************************************************/
    7535             : /*                       GWKAverageOrModeThread()                       */
    7536             : /************************************************************************/
    7537             : 
    7538             : // Overall logic based on GWKGeneralCaseThread().
    7539         246 : static void GWKAverageOrModeThread(void *pData)
    7540             : {
    7541         246 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    7542         246 :     const GDALWarpKernel *poWK = psJob->poWK;
    7543         246 :     const int iYMin = psJob->iYMin;
    7544         246 :     const int iYMax = psJob->iYMax;
    7545             :     const double dfMultFactorVerticalShiftPipeline =
    7546         246 :         poWK->bApplyVerticalShift
    7547         246 :             ? CPLAtof(CSLFetchNameValueDef(
    7548           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    7549             :                   "1.0"))
    7550         246 :             : 0.0;
    7551             :     const bool bAvoidNoDataSingleBand =
    7552         342 :         poWK->nBands == 1 ||
    7553          96 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7554         246 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    7555             : 
    7556         246 :     const int nDstXSize = poWK->nDstXSize;
    7557         246 :     const int nSrcXSize = poWK->nSrcXSize;
    7558             : 
    7559             :     /* -------------------------------------------------------------------- */
    7560             :     /*      Find out which algorithm to use (small optim.)                  */
    7561             :     /* -------------------------------------------------------------------- */
    7562             : 
    7563             :     // Only used for GRA_Mode
    7564         246 :     float *pafCounts = nullptr;
    7565         246 :     int nBins = 0;
    7566         246 :     int nBinsOffset = 0;
    7567         246 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7568             : 
    7569             :     // Only used with Q1, Med and Q3
    7570         246 :     float quant = 0.0f;
    7571             : 
    7572             :     // To control array allocation only when data type is complex
    7573         246 :     const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
    7574             : 
    7575         246 :     if (poWK->eResample == GRA_Mode)
    7576             :     {
    7577          45 :         if (poWK->bApplyVerticalShift)
    7578             :         {
    7579           0 :             return GWKModeComplexType(psJob);
    7580             :         }
    7581             : 
    7582          45 :         switch (poWK->eWorkingDataType)
    7583             :         {
    7584           7 :             case GDT_UInt8:
    7585           7 :                 nBins = 256;
    7586           7 :                 break;
    7587             : 
    7588           1 :             case GDT_Int8:
    7589           1 :                 nBins = 256;
    7590           1 :                 nBinsOffset = nBins / 2;
    7591           1 :                 break;
    7592             : 
    7593           1 :             case GDT_UInt16:
    7594           1 :                 nBins = 65536;
    7595           1 :                 break;
    7596             : 
    7597           9 :             case GDT_Int16:
    7598           9 :                 nBins = 65536;
    7599           9 :                 nBinsOffset = nBins / 2;
    7600           9 :                 break;
    7601             : 
    7602          10 :             case GDT_Int32:
    7603          10 :                 return GWKModeRealType<int32_t>(psJob);
    7604             : 
    7605           1 :             case GDT_UInt32:
    7606           1 :                 return GWKModeRealType<uint32_t>(psJob);
    7607             : 
    7608           1 :             case GDT_Int64:
    7609           1 :                 return GWKModeRealType<int64_t>(psJob);
    7610             : 
    7611           1 :             case GDT_UInt64:
    7612           1 :                 return GWKModeRealType<uint64_t>(psJob);
    7613             : 
    7614           0 :             case GDT_Float16:
    7615           0 :                 return GWKModeRealType<GFloat16>(psJob);
    7616             : 
    7617           4 :             case GDT_Float32:
    7618           4 :                 return GWKModeRealType<float>(psJob);
    7619             : 
    7620           2 :             case GDT_Float64:
    7621           2 :                 return GWKModeRealType<double>(psJob);
    7622             : 
    7623           8 :             case GDT_CInt16:
    7624             :             case GDT_CInt32:
    7625             :             case GDT_CFloat16:
    7626             :             case GDT_CFloat32:
    7627             :             case GDT_CFloat64:
    7628           8 :                 return GWKModeComplexType(psJob);
    7629             : 
    7630           0 :             case GDT_Unknown:
    7631             :             case GDT_TypeCount:
    7632           0 :                 CPLAssert(false);
    7633             :                 return;
    7634             :         }
    7635             : 
    7636          18 :         if (nBins)
    7637             :         {
    7638             :             pafCounts =
    7639          18 :                 static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
    7640          18 :             if (pafCounts == nullptr)
    7641           0 :                 return;
    7642             :         }
    7643             :     }
    7644         201 :     else if (poWK->eResample == GRA_Med)
    7645             :     {
    7646           6 :         quant = 0.5f;
    7647             :     }
    7648         195 :     else if (poWK->eResample == GRA_Q1)
    7649             :     {
    7650          10 :         quant = 0.25f;
    7651             :     }
    7652         185 :     else if (poWK->eResample == GRA_Q3)
    7653             :     {
    7654           5 :         quant = 0.75f;
    7655             :     }
    7656         180 :     else if (poWK->eResample != GRA_Average && poWK->eResample != GRA_RMS &&
    7657          11 :              poWK->eResample != GRA_Min && poWK->eResample != GRA_Max)
    7658             :     {
    7659             :         // Other resample algorithms not permitted here.
    7660           0 :         CPLError(CE_Fatal, CPLE_AppDefined,
    7661             :                  "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
    7662             :                  "illegal resample");
    7663             :     }
    7664             : 
    7665         219 :     CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread()");
    7666             : 
    7667             :     /* -------------------------------------------------------------------- */
    7668             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7669             :     /*      scanlines worth of positions.                                   */
    7670             :     /* -------------------------------------------------------------------- */
    7671             : 
    7672             :     double *padfX =
    7673         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7674             :     double *padfY =
    7675         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7676             :     double *padfZ =
    7677         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7678             :     double *padfX2 =
    7679         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7680             :     double *padfY2 =
    7681         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7682             :     double *padfZ2 =
    7683         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7684         219 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7685         219 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7686             : 
    7687         219 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7688         219 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7689         219 :     const double dfErrorThreshold = CPLAtof(
    7690         219 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7691             : 
    7692             :     const double dfExcludedValuesThreshold =
    7693         219 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7694             :                                      "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
    7695         219 :         100.0;
    7696             :     const double dfNodataValuesThreshold =
    7697         219 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7698             :                                      "NODATA_VALUES_PCT_THRESHOLD", "100")) /
    7699         219 :         100.0;
    7700             : 
    7701             :     const int nXMargin =
    7702         219 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7703             :     const int nYMargin =
    7704         219 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7705             : 
    7706             :     /* ==================================================================== */
    7707             :     /*      Loop over output lines.                                         */
    7708             :     /* ==================================================================== */
    7709       28777 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7710             :     {
    7711       28558 :         GWKAverageOrModeComputeLineCoords(
    7712             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7713             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7714             : 
    7715             :         /* ====================================================================
    7716             :          */
    7717             :         /*      Loop over pixels in output scanline. */
    7718             :         /* ====================================================================
    7719             :          */
    7720     7357360 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7721             :         {
    7722     7328800 :             GPtrDiff_t iSrcOffset = 0;
    7723     7328800 :             double dfDensity = 1.0;
    7724     7328800 :             bool bHasFoundDensity = false;
    7725             : 
    7726     7328800 :             bool bWrapOverX = false;
    7727     7328800 :             double dfXMin = 0;
    7728     7328800 :             double dfYMin = 0;
    7729     7328800 :             double dfXMax = 0;
    7730     7328800 :             double dfYMax = 0;
    7731     7328800 :             int iSrcXMin = 0;
    7732     7328800 :             int iSrcYMin = 0;
    7733     7328800 :             int iSrcXMax = 0;
    7734     7328800 :             int iSrcYMax = 0;
    7735     7328800 :             if (!GWKAverageOrModeComputeSourceCoords(
    7736             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7737             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7738             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7739             :             {
    7740     3158560 :                 continue;
    7741             :             }
    7742             : 
    7743     5314750 :             const GPtrDiff_t iDstOffset =
    7744     5314750 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7745             : 
    7746     5314750 :             bool bDone = false;
    7747             : 
    7748             :             // Special Average mode where we process all bands together,
    7749             :             // to avoid averaging tuples that match an entry of m_aadfExcludedValues
    7750     5314750 :             constexpr double EPSILON = 1e-10;
    7751    14838200 :             if (poWK->eResample == GRA_Average &&
    7752     4208720 :                 (!poWK->m_aadfExcludedValues.empty() ||
    7753      393224 :                  dfNodataValuesThreshold < 1 - EPSILON) &&
    7754     9523480 :                 !poWK->bApplyVerticalShift && !bIsComplex)
    7755             :             {
    7756      393224 :                 double dfTotalWeightInvalid = 0.0;
    7757      393224 :                 double dfTotalWeightExcluded = 0.0;
    7758      393224 :                 double dfTotalWeightRegular = 0.0;
    7759      786448 :                 std::vector<double> adfValueReal(poWK->nBands, 0);
    7760      786448 :                 std::vector<double> adfValueAveraged(poWK->nBands, 0);
    7761             :                 std::vector<int> anCountExcludedValues(
    7762      393224 :                     poWK->m_aadfExcludedValues.size(), 0);
    7763             : 
    7764     1179670 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7765             :                 {
    7766      786448 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7767      786448 :                     iSrcOffset =
    7768      786448 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7769     2359340 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7770             :                          iSrcX++, iSrcOffset++)
    7771             :                     {
    7772     1572900 :                         if (bWrapOverX)
    7773           0 :                             iSrcOffset =
    7774           0 :                                 (iSrcX % nSrcXSize) +
    7775           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7776             : 
    7777     1572900 :                         const double dfWeight =
    7778     1572900 :                             COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7779     1572900 :                         if (dfWeight <= 0)
    7780           0 :                             continue;
    7781             : 
    7782     1572910 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7783          12 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7784             :                         {
    7785           3 :                             dfTotalWeightInvalid += dfWeight;
    7786           3 :                             continue;
    7787             :                         }
    7788             : 
    7789     1572890 :                         bool bAllValid = true;
    7790     2359410 :                         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7791             :                         {
    7792     2097230 :                             double dfBandDensity = 0;
    7793     2097230 :                             double dfValueImagTmp = 0;
    7794     2883740 :                             if (!(GWKGetPixelValue(
    7795             :                                       poWK, iBand, iSrcOffset, &dfBandDensity,
    7796     2097230 :                                       &adfValueReal[iBand], &dfValueImagTmp) &&
    7797      786513 :                                   dfBandDensity > BAND_DENSITY_THRESHOLD))
    7798             :                             {
    7799     1310720 :                                 bAllValid = false;
    7800     1310720 :                                 break;
    7801             :                             }
    7802             :                         }
    7803             : 
    7804     1572890 :                         if (!bAllValid)
    7805             :                         {
    7806     1310720 :                             dfTotalWeightInvalid += dfWeight;
    7807     1310720 :                             continue;
    7808             :                         }
    7809             : 
    7810      262177 :                         bool bExcludedValueFound = false;
    7811      393263 :                         for (size_t i = 0;
    7812      393263 :                              i < poWK->m_aadfExcludedValues.size(); ++i)
    7813             :                         {
    7814      131092 :                             if (poWK->m_aadfExcludedValues[i] == adfValueReal)
    7815             :                             {
    7816           6 :                                 bExcludedValueFound = true;
    7817           6 :                                 ++anCountExcludedValues[i];
    7818           6 :                                 dfTotalWeightExcluded += dfWeight;
    7819           6 :                                 break;
    7820             :                             }
    7821             :                         }
    7822      262177 :                         if (!bExcludedValueFound)
    7823             :                         {
    7824             :                             // Weighted incremental algorithm mean
    7825             :                             // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7826      262171 :                             dfTotalWeightRegular += dfWeight;
    7827     1048670 :                             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7828             :                             {
    7829      786495 :                                 adfValueAveraged[iBand] +=
    7830     1572990 :                                     (dfWeight / dfTotalWeightRegular) *
    7831     1572990 :                                     (adfValueReal[iBand] -
    7832      786495 :                                      adfValueAveraged[iBand]);
    7833             :                             }
    7834             :                         }
    7835             :                     }
    7836             :                 }
    7837             : 
    7838      393224 :                 const double dfTotalWeight = dfTotalWeightInvalid +
    7839             :                                              dfTotalWeightExcluded +
    7840             :                                              dfTotalWeightRegular;
    7841      393224 :                 if (dfTotalWeightInvalid > 0 &&
    7842             :                     dfTotalWeightInvalid >=
    7843      327685 :                         dfNodataValuesThreshold * dfTotalWeight)
    7844             :                 {
    7845             :                     // Do nothing. Let bHasFoundDensity to false.
    7846             :                 }
    7847       65543 :                 else if (dfTotalWeightExcluded > 0 &&
    7848             :                          dfTotalWeightExcluded >=
    7849           6 :                              dfExcludedValuesThreshold * dfTotalWeight)
    7850             :                 {
    7851             :                     // Find the most represented excluded value tuple
    7852           2 :                     size_t iExcludedValue = 0;
    7853           2 :                     int nExcludedValueCount = 0;
    7854           4 :                     for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
    7855             :                          ++i)
    7856             :                     {
    7857           2 :                         if (anCountExcludedValues[i] > nExcludedValueCount)
    7858             :                         {
    7859           2 :                             iExcludedValue = i;
    7860           2 :                             nExcludedValueCount = anCountExcludedValues[i];
    7861             :                         }
    7862             :                     }
    7863             : 
    7864           2 :                     bHasFoundDensity = true;
    7865             : 
    7866           8 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7867             :                     {
    7868           6 :                         GWKSetPixelValue(
    7869             :                             poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
    7870           6 :                             poWK->m_aadfExcludedValues[iExcludedValue][iBand],
    7871             :                             0, bAvoidNoDataSingleBand);
    7872             :                     }
    7873             : 
    7874           2 :                     if (!bAvoidNoDataSingleBand)
    7875             :                     {
    7876           0 :                         GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7877           2 :                     }
    7878             :                 }
    7879       65541 :                 else if (dfTotalWeightRegular > 0)
    7880             :                 {
    7881       65541 :                     bHasFoundDensity = true;
    7882             : 
    7883      262160 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7884             :                     {
    7885      196619 :                         GWKSetPixelValue(poWK, iBand, iDstOffset,
    7886             :                                          /* dfBandDensity = */ 1.0,
    7887      196619 :                                          adfValueAveraged[iBand], 0,
    7888             :                                          bAvoidNoDataSingleBand);
    7889             :                     }
    7890             : 
    7891       65541 :                     if (!bAvoidNoDataSingleBand)
    7892             :                     {
    7893           0 :                         GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7894             :                     }
    7895             :                 }
    7896             : 
    7897             :                 // Skip below loop on bands
    7898      393224 :                 bDone = true;
    7899             :             }
    7900             : 
    7901             :             /* ====================================================================
    7902             :              */
    7903             :             /*      Loop processing each band. */
    7904             :             /* ====================================================================
    7905             :              */
    7906             : 
    7907    17670500 :             for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
    7908             :             {
    7909    12355700 :                 double dfBandDensity = 0.0;
    7910    12355700 :                 double dfValueReal = 0.0;
    7911    12355700 :                 double dfValueImag = 0.0;
    7912    12355700 :                 double dfValueRealTmp = 0.0;
    7913    12355700 :                 double dfValueImagTmp = 0.0;
    7914             : 
    7915             :                 /* --------------------------------------------------------------------
    7916             :                  */
    7917             :                 /*      Collect the source value. */
    7918             :                 /* --------------------------------------------------------------------
    7919             :                  */
    7920             : 
    7921             :                 // Loop over source lines and pixels - 3 possible algorithms.
    7922             : 
    7923    12355700 :                 if (poWK->eResample == GRA_Average)
    7924             :                 {
    7925     9833240 :                     double dfTotalWeight = 0.0;
    7926             : 
    7927             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7928             :                     // in gcore/overview.cpp.
    7929    25243600 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7930             :                     {
    7931    15410300 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7932    15410300 :                         iSrcOffset = iSrcXMin +
    7933    15410300 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7934    44761400 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7935             :                              iSrcX++, iSrcOffset++)
    7936             :                         {
    7937    29351100 :                             if (bWrapOverX)
    7938        2571 :                                 iSrcOffset =
    7939        2571 :                                     (iSrcX % nSrcXSize) +
    7940        2571 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7941             : 
    7942    29351100 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7943           4 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7944             :                                             iSrcOffset))
    7945             :                             {
    7946           1 :                                 continue;
    7947             :                             }
    7948             : 
    7949    29351100 :                             if (GWKGetPixelValue(
    7950             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7951    48239400 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7952    18888400 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7953             :                             {
    7954    18888400 :                                 const double dfWeight =
    7955    18888400 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7956    18888400 :                                 if (dfWeight > 0)
    7957             :                                 {
    7958             :                                     // Weighted incremental algorithm mean
    7959             :                                     // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7960    18888400 :                                     dfTotalWeight += dfWeight;
    7961    18888400 :                                     dfValueReal +=
    7962    18888400 :                                         (dfWeight / dfTotalWeight) *
    7963    18888400 :                                         (dfValueRealTmp - dfValueReal);
    7964    18888400 :                                     if (bIsComplex)
    7965             :                                     {
    7966         252 :                                         dfValueImag +=
    7967         252 :                                             (dfWeight / dfTotalWeight) *
    7968         252 :                                             (dfValueImagTmp - dfValueImag);
    7969             :                                     }
    7970             :                                 }
    7971             :                             }
    7972             :                         }
    7973             :                     }
    7974             : 
    7975     9833240 :                     if (dfTotalWeight > 0)
    7976             :                     {
    7977     7530420 :                         if (poWK->bApplyVerticalShift)
    7978             :                         {
    7979           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7980           0 :                                 continue;
    7981             :                             // Subtract padfZ[] since the coordinate
    7982             :                             // transformation is from target to source
    7983           0 :                             dfValueReal =
    7984           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7985           0 :                                 padfZ[iDstX] *
    7986             :                                     dfMultFactorVerticalShiftPipeline;
    7987             :                         }
    7988             : 
    7989     7530420 :                         dfBandDensity = 1;
    7990     7530420 :                         bHasFoundDensity = true;
    7991             :                     }
    7992             :                 }  // GRA_Average.
    7993             : 
    7994     2522460 :                 else if (poWK->eResample == GRA_RMS)
    7995             :                 {
    7996      300416 :                     double dfTotalReal = 0.0;
    7997      300416 :                     double dfTotalImag = 0.0;
    7998      300416 :                     double dfTotalWeight = 0.0;
    7999             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    8000             :                     // in gcore/overview.cpp.
    8001      630578 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8002             :                     {
    8003      330162 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    8004      330162 :                         iSrcOffset = iSrcXMin +
    8005      330162 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8006      772930 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8007             :                              iSrcX++, iSrcOffset++)
    8008             :                         {
    8009      442768 :                             if (bWrapOverX)
    8010        1371 :                                 iSrcOffset =
    8011        1371 :                                     (iSrcX % nSrcXSize) +
    8012        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8013             : 
    8014      442768 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8015           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8016             :                                             iSrcOffset))
    8017             :                             {
    8018           0 :                                 continue;
    8019             :                             }
    8020             : 
    8021      442768 :                             if (GWKGetPixelValue(
    8022             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8023      885536 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8024      442768 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8025             :                             {
    8026      442768 :                                 const double dfWeight =
    8027      442768 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    8028      442768 :                                 dfTotalWeight += dfWeight;
    8029      442768 :                                 dfTotalReal +=
    8030      442768 :                                     dfValueRealTmp * dfValueRealTmp * dfWeight;
    8031      442768 :                                 if (bIsComplex)
    8032          48 :                                     dfTotalImag += dfValueImagTmp *
    8033          48 :                                                    dfValueImagTmp * dfWeight;
    8034             :                             }
    8035             :                         }
    8036             :                     }
    8037             : 
    8038      300416 :                     if (dfTotalWeight > 0)
    8039             :                     {
    8040      300416 :                         dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
    8041             : 
    8042      300416 :                         if (poWK->bApplyVerticalShift)
    8043             :                         {
    8044           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8045           0 :                                 continue;
    8046             :                             // Subtract padfZ[] since the coordinate
    8047             :                             // transformation is from target to source
    8048           0 :                             dfValueReal =
    8049           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8050           0 :                                 padfZ[iDstX] *
    8051             :                                     dfMultFactorVerticalShiftPipeline;
    8052             :                         }
    8053             : 
    8054      300416 :                         if (bIsComplex)
    8055          12 :                             dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
    8056             : 
    8057      300416 :                         dfBandDensity = 1;
    8058      300416 :                         bHasFoundDensity = true;
    8059             :                     }
    8060             :                 }  // GRA_RMS.
    8061             : 
    8062     2222040 :                 else if (poWK->eResample == GRA_Mode)
    8063             :                 {
    8064      496623 :                     float fMaxCount = 0.0f;
    8065      496623 :                     int nMode = -1;
    8066      496623 :                     bool bHasSourceValues = false;
    8067             : 
    8068      496623 :                     memset(pafCounts, 0, nBins * sizeof(float));
    8069             : 
    8070     1167120 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8071             :                     {
    8072      670495 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    8073      670495 :                         iSrcOffset = iSrcXMin +
    8074      670495 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8075     1964680 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8076             :                              iSrcX++, iSrcOffset++)
    8077             :                         {
    8078     1294190 :                             if (bWrapOverX)
    8079        1371 :                                 iSrcOffset =
    8080        1371 :                                     (iSrcX % nSrcXSize) +
    8081        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8082             : 
    8083     1294190 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8084           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8085             :                                             iSrcOffset))
    8086           0 :                                 continue;
    8087             : 
    8088     1294190 :                             if (GWKGetPixelValue(
    8089             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8090     2588370 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8091     1294190 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8092             :                             {
    8093     1294190 :                                 bHasSourceValues = true;
    8094     1294190 :                                 const int nVal =
    8095     1294190 :                                     static_cast<int>(dfValueRealTmp);
    8096     1294190 :                                 const int iBin = nVal + nBinsOffset;
    8097     1294190 :                                 const double dfWeight =
    8098     1294190 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    8099             : 
    8100             :                                 // Sum the density.
    8101     1294190 :                                 pafCounts[iBin] += static_cast<float>(dfWeight);
    8102             :                                 // Is it the most common value so far?
    8103     1294190 :                                 bool bUpdateMode = pafCounts[iBin] > fMaxCount;
    8104     1294190 :                                 if (!bUpdateMode &&
    8105      227545 :                                     pafCounts[iBin] == fMaxCount)
    8106             :                                 {
    8107       15866 :                                     switch (eTieStrategy)
    8108             :                                     {
    8109       15858 :                                         case GWKTS_First:
    8110       15858 :                                             break;
    8111           4 :                                         case GWKTS_Min:
    8112           4 :                                             bUpdateMode = nVal < nMode;
    8113           4 :                                             break;
    8114           4 :                                         case GWKTS_Max:
    8115           4 :                                             bUpdateMode = nVal > nMode;
    8116           4 :                                             break;
    8117             :                                     }
    8118             :                                 }
    8119     1294190 :                                 if (bUpdateMode)
    8120             :                                 {
    8121     1066640 :                                     nMode = nVal;
    8122     1066640 :                                     fMaxCount = pafCounts[iBin];
    8123             :                                 }
    8124             :                             }
    8125             :                         }
    8126             :                     }
    8127             : 
    8128      496623 :                     if (bHasSourceValues)
    8129             :                     {
    8130      496623 :                         dfValueReal = nMode;
    8131      496623 :                         dfBandDensity = 1;
    8132      496623 :                         bHasFoundDensity = true;
    8133             :                     }
    8134             :                 }  // GRA_Mode.
    8135             : 
    8136     1725420 :                 else if (poWK->eResample == GRA_Max)
    8137             :                 {
    8138      335037 :                     bool bFoundValid = false;
    8139      335037 :                     double dfTotalReal = cpl::NumericLimits<double>::lowest();
    8140             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8141      842572 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8142             :                     {
    8143      507535 :                         iSrcOffset = iSrcXMin +
    8144      507535 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8145     1638060 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8146             :                              iSrcX++, iSrcOffset++)
    8147             :                         {
    8148     1130520 :                             if (bWrapOverX)
    8149        1371 :                                 iSrcOffset =
    8150        1371 :                                     (iSrcX % nSrcXSize) +
    8151        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8152             : 
    8153     1133330 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8154        2809 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8155             :                                             iSrcOffset))
    8156             :                             {
    8157        2446 :                                 continue;
    8158             :                             }
    8159             : 
    8160             :                             // Returns pixel value if it is not no data.
    8161     1128070 :                             if (GWKGetPixelValue(
    8162             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8163     2256150 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8164     1128070 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8165             :                             {
    8166     1128070 :                                 bFoundValid = true;
    8167     1128070 :                                 if (dfTotalReal < dfValueRealTmp)
    8168             :                                 {
    8169      463372 :                                     dfTotalReal = dfValueRealTmp;
    8170             :                                 }
    8171             :                             }
    8172             :                         }
    8173             :                     }
    8174             : 
    8175      335037 :                     if (bFoundValid)
    8176             :                     {
    8177      335037 :                         dfValueReal = dfTotalReal;
    8178             : 
    8179      335037 :                         if (poWK->bApplyVerticalShift)
    8180             :                         {
    8181           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8182           0 :                                 continue;
    8183             :                             // Subtract padfZ[] since the coordinate
    8184             :                             // transformation is from target to source
    8185           0 :                             dfValueReal =
    8186           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8187           0 :                                 padfZ[iDstX] *
    8188             :                                     dfMultFactorVerticalShiftPipeline;
    8189             :                         }
    8190             : 
    8191      335037 :                         dfBandDensity = 1;
    8192      335037 :                         bHasFoundDensity = true;
    8193             :                     }
    8194             :                 }
    8195             : 
    8196     1390380 :                 else if (poWK->eResample == GRA_Min)
    8197             :                 {
    8198      335012 :                     bool bFoundValid = false;
    8199      335012 :                     double dfTotalReal = cpl::NumericLimits<double>::max();
    8200             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8201      842282 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8202             :                     {
    8203      507270 :                         iSrcOffset = iSrcXMin +
    8204      507270 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8205     1634980 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8206             :                              iSrcX++, iSrcOffset++)
    8207             :                         {
    8208     1127710 :                             if (bWrapOverX)
    8209        1371 :                                 iSrcOffset =
    8210        1371 :                                     (iSrcX % nSrcXSize) +
    8211        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8212             : 
    8213     1127710 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8214           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8215             :                                             iSrcOffset))
    8216             :                             {
    8217           0 :                                 continue;
    8218             :                             }
    8219             : 
    8220             :                             // Returns pixel value if it is not no data.
    8221     1127710 :                             if (GWKGetPixelValue(
    8222             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8223     2255420 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8224     1127710 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8225             :                             {
    8226     1127710 :                                 bFoundValid = true;
    8227     1127710 :                                 if (dfTotalReal > dfValueRealTmp)
    8228             :                                 {
    8229      464157 :                                     dfTotalReal = dfValueRealTmp;
    8230             :                                 }
    8231             :                             }
    8232             :                         }
    8233             :                     }
    8234             : 
    8235      335012 :                     if (bFoundValid)
    8236             :                     {
    8237      335012 :                         dfValueReal = dfTotalReal;
    8238             : 
    8239      335012 :                         if (poWK->bApplyVerticalShift)
    8240             :                         {
    8241           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8242           0 :                                 continue;
    8243             :                             // Subtract padfZ[] since the coordinate
    8244             :                             // transformation is from target to source
    8245           0 :                             dfValueReal =
    8246           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8247           0 :                                 padfZ[iDstX] *
    8248             :                                     dfMultFactorVerticalShiftPipeline;
    8249             :                         }
    8250             : 
    8251      335012 :                         dfBandDensity = 1;
    8252      335012 :                         bHasFoundDensity = true;
    8253             :                     }
    8254             :                 }  // GRA_Min.
    8255             : 
    8256             :                 else
    8257             :                 // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
    8258             :                 {
    8259     1055370 :                     CPLAssert(quant > 0.0f);
    8260             : 
    8261     1055370 :                     bool bFoundValid = false;
    8262     1055370 :                     std::vector<double> dfRealValuesTmp;
    8263             : 
    8264             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8265     2677810 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8266             :                     {
    8267     1622440 :                         iSrcOffset = iSrcXMin +
    8268     1622440 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8269     5205220 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8270             :                              iSrcX++, iSrcOffset++)
    8271             :                         {
    8272     3582770 :                             if (bWrapOverX)
    8273        4113 :                                 iSrcOffset =
    8274        4113 :                                     (iSrcX % nSrcXSize) +
    8275        4113 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8276             : 
    8277     3779380 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8278      196608 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8279             :                                             iSrcOffset))
    8280             :                             {
    8281      195449 :                                 continue;
    8282             :                             }
    8283             : 
    8284             :                             // Returns pixel value if it is not no data.
    8285     3387320 :                             if (GWKGetPixelValue(
    8286             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8287     6774650 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8288     3387320 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8289             :                             {
    8290     3387320 :                                 bFoundValid = true;
    8291     3387320 :                                 dfRealValuesTmp.push_back(dfValueRealTmp);
    8292             :                             }
    8293             :                         }
    8294             :                     }
    8295             : 
    8296     1055370 :                     if (bFoundValid)
    8297             :                     {
    8298     1006150 :                         std::sort(dfRealValuesTmp.begin(),
    8299             :                                   dfRealValuesTmp.end());
    8300             :                         int quantIdx = static_cast<int>(
    8301     1006150 :                             std::ceil(quant * dfRealValuesTmp.size() - 1));
    8302     1006150 :                         dfValueReal = dfRealValuesTmp[quantIdx];
    8303             : 
    8304     1006150 :                         if (poWK->bApplyVerticalShift)
    8305             :                         {
    8306           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8307           0 :                                 continue;
    8308             :                             // Subtract padfZ[] since the coordinate
    8309             :                             // transformation is from target to source
    8310           0 :                             dfValueReal =
    8311           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8312           0 :                                 padfZ[iDstX] *
    8313             :                                     dfMultFactorVerticalShiftPipeline;
    8314             :                         }
    8315             : 
    8316     1006150 :                         dfBandDensity = 1;
    8317     1006150 :                         bHasFoundDensity = true;
    8318     1006150 :                         dfRealValuesTmp.clear();
    8319             :                     }
    8320             :                 }  // Quantile.
    8321             : 
    8322             :                 /* --------------------------------------------------------------------
    8323             :                  */
    8324             :                 /*      We have a computed value from the source.  Now apply it
    8325             :                  * to      */
    8326             :                 /*      the destination pixel. */
    8327             :                 /* --------------------------------------------------------------------
    8328             :                  */
    8329    12355700 :                 if (bHasFoundDensity)
    8330             :                 {
    8331             :                     // TODO: Should we compute dfBandDensity in fct of
    8332             :                     // nCount/nCount2, or use as a threshold to set the dest
    8333             :                     // value?
    8334             :                     // dfBandDensity = (float) nCount / nCount2;
    8335             :                     // if( (float) nCount / nCount2 > 0.1 )
    8336             :                     // or fix gdalwarp crop_to_cutline to crop partially
    8337             :                     // overlapping pixels.
    8338    10003600 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    8339             :                                      dfValueReal, dfValueImag,
    8340             :                                      bAvoidNoDataSingleBand);
    8341             :                 }
    8342             :             }
    8343             : 
    8344     5314750 :             if (!bHasFoundDensity)
    8345     1144510 :                 continue;
    8346             : 
    8347     4170240 :             if (!bAvoidNoDataSingleBand)
    8348             :             {
    8349           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    8350             :             }
    8351             : 
    8352             :             /* --------------------------------------------------------------------
    8353             :              */
    8354             :             /*      Update destination density/validity masks. */
    8355             :             /* --------------------------------------------------------------------
    8356             :              */
    8357     4170240 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    8358             : 
    8359     4170240 :             if (poWK->panDstValid != nullptr)
    8360             :             {
    8361        1184 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    8362             :             }
    8363             :         } /* Next iDstX */
    8364             : 
    8365             :         /* --------------------------------------------------------------------
    8366             :          */
    8367             :         /*      Report progress to the user, and optionally cancel out. */
    8368             :         /* --------------------------------------------------------------------
    8369             :          */
    8370       28558 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    8371           0 :             break;
    8372             :     }
    8373             : 
    8374             :     /* -------------------------------------------------------------------- */
    8375             :     /*      Cleanup and return.                                             */
    8376             :     /* -------------------------------------------------------------------- */
    8377         219 :     CPLFree(padfX);
    8378         219 :     CPLFree(padfY);
    8379         219 :     CPLFree(padfZ);
    8380         219 :     CPLFree(padfX2);
    8381         219 :     CPLFree(padfY2);
    8382         219 :     CPLFree(padfZ2);
    8383         219 :     CPLFree(pabSuccess);
    8384         219 :     CPLFree(pabSuccess2);
    8385         219 :     VSIFree(pafCounts);
    8386             : }
    8387             : 
    8388             : /************************************************************************/
    8389             : /*                           getOrientation()                           */
    8390             : /************************************************************************/
    8391             : 
    8392             : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
    8393             : // -1 if it is counter-clockwise oriented,
    8394             : // or 0 if it is colinear.
    8395     2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
    8396             : {
    8397     2355910 :     const double p1x = p1.first;
    8398     2355910 :     const double p1y = p1.second;
    8399     2355910 :     const double p2x = p2.first;
    8400     2355910 :     const double p2y = p2.second;
    8401     2355910 :     const double p3x = p3.first;
    8402     2355910 :     const double p3y = p3.second;
    8403     2355910 :     const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
    8404     2355910 :     if (std::abs(val) < 1e-20)
    8405        2690 :         return 0;
    8406     2353220 :     else if (val > 0)
    8407           0 :         return 1;
    8408             :     else
    8409     2353220 :         return -1;
    8410             : }
    8411             : 
    8412             : /************************************************************************/
    8413             : /*                              isConvex()                              */
    8414             : /************************************************************************/
    8415             : 
    8416             : // poly must be closed
    8417      785302 : static bool isConvex(const XYPoly &poly)
    8418             : {
    8419      785302 :     const size_t n = poly.size();
    8420      785302 :     size_t i = 0;
    8421      785302 :     int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8422      785302 :     ++i;
    8423     2355910 :     for (; i < n - 2; ++i)
    8424             :     {
    8425             :         const int orientation =
    8426     1570600 :             getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8427     1570600 :         if (orientation != 0)
    8428             :         {
    8429     1567910 :             if (last_orientation == 0)
    8430           0 :                 last_orientation = orientation;
    8431     1567910 :             else if (orientation != last_orientation)
    8432           0 :                 return false;
    8433             :         }
    8434             :     }
    8435      785302 :     return true;
    8436             : }
    8437             : 
    8438             : /************************************************************************/
    8439             : /*                     pointIntersectsConvexPoly()                      */
    8440             : /************************************************************************/
    8441             : 
    8442             : // Returns whether xy intersects poly, that must be closed and convex.
    8443     6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
    8444             : {
    8445     6049100 :     const size_t n = poly.size();
    8446     6049100 :     double dx1 = xy.first - poly[0].first;
    8447     6049100 :     double dy1 = xy.second - poly[0].second;
    8448     6049100 :     double dx2 = poly[1].first - poly[0].first;
    8449     6049100 :     double dy2 = poly[1].second - poly[0].second;
    8450     6049100 :     double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
    8451             : 
    8452             :     // Check if the point remains on the same side (left/right) of all edges
    8453    14556400 :     for (size_t i = 2; i < n; i++)
    8454             :     {
    8455    12793100 :         dx1 = xy.first - poly[i - 1].first;
    8456    12793100 :         dy1 = xy.second - poly[i - 1].second;
    8457             : 
    8458    12793100 :         dx2 = poly[i].first - poly[i - 1].first;
    8459    12793100 :         dy2 = poly[i].second - poly[i - 1].second;
    8460             : 
    8461    12793100 :         double crossProduct = dx1 * dy2 - dx2 * dy1;
    8462    12793100 :         if (std::abs(prevCrossProduct) < 1e-20)
    8463      725558 :             prevCrossProduct = crossProduct;
    8464    12067500 :         else if (prevCrossProduct * crossProduct < 0)
    8465     4285760 :             return false;
    8466             :     }
    8467             : 
    8468     1763340 :     return true;
    8469             : }
    8470             : 
    8471             : /************************************************************************/
    8472             : /*                          getIntersection()                           */
    8473             : /************************************************************************/
    8474             : 
    8475             : /* Returns intersection of [p1,p2] with [p3,p4], if
    8476             :  * it is a single point, and the 2 segments are not colinear.
    8477             :  */
    8478    11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
    8479             :                             const XYPair &p3, const XYPair &p4, XYPair &xy)
    8480             : {
    8481    11811000 :     const double x1 = p1.first;
    8482    11811000 :     const double y1 = p1.second;
    8483    11811000 :     const double x2 = p2.first;
    8484    11811000 :     const double y2 = p2.second;
    8485    11811000 :     const double x3 = p3.first;
    8486    11811000 :     const double y3 = p3.second;
    8487    11811000 :     const double x4 = p4.first;
    8488    11811000 :     const double y4 = p4.second;
    8489    11811000 :     const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
    8490    11811000 :     const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
    8491    11811000 :     if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
    8492     9260780 :         return false;
    8493             : 
    8494     2550260 :     const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
    8495     2550260 :     if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
    8496      973924 :         return false;
    8497             : 
    8498     1576340 :     const double t = t_num / denom;
    8499     1576340 :     xy.first = x1 + t * (x2 - x1);
    8500     1576340 :     xy.second = y1 + t * (y2 - y1);
    8501     1576340 :     return true;
    8502             : }
    8503             : 
    8504             : /************************************************************************/
    8505             : /*                     getConvexPolyIntersection()                      */
    8506             : /************************************************************************/
    8507             : 
    8508             : // poly1 and poly2 must be closed and convex.
    8509             : // The returned intersection will not necessary be closed.
    8510      785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
    8511             :                                       XYPoly &intersection)
    8512             : {
    8513      785302 :     intersection.clear();
    8514             : 
    8515             :     // Add all points of poly1 inside poly2
    8516     3926510 :     for (size_t i = 0; i < poly1.size() - 1; ++i)
    8517             :     {
    8518     3141210 :         if (pointIntersectsConvexPoly(poly1[i], poly2))
    8519     1187430 :             intersection.push_back(poly1[i]);
    8520             :     }
    8521      785302 :     if (intersection.size() == poly1.size() - 1)
    8522             :     {
    8523             :         // poly1 is inside poly2
    8524      119100 :         return;
    8525             :     }
    8526             : 
    8527             :     // Add all points of poly2 inside poly1
    8528     3634860 :     for (size_t i = 0; i < poly2.size() - 1; ++i)
    8529             :     {
    8530     2907890 :         if (pointIntersectsConvexPoly(poly2[i], poly1))
    8531      575904 :             intersection.push_back(poly2[i]);
    8532             :     }
    8533             : 
    8534             :     // Compute the intersection of all edges of both polygons
    8535      726972 :     XYPair xy;
    8536     3634860 :     for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
    8537             :     {
    8538    14539400 :         for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
    8539             :         {
    8540    11631600 :             if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
    8541    11631600 :                                 poly2[i2 + 1], xy))
    8542             :             {
    8543     1576230 :                 intersection.push_back(xy);
    8544             :             }
    8545             :         }
    8546             :     }
    8547             : 
    8548      726972 :     if (intersection.empty())
    8549       60770 :         return;
    8550             : 
    8551             :     // Find lowest-left point in intersection set
    8552      666202 :     double lowest_x = cpl::NumericLimits<double>::max();
    8553      666202 :     double lowest_y = cpl::NumericLimits<double>::max();
    8554     3772450 :     for (const auto &pair : intersection)
    8555             :     {
    8556     3106240 :         const double x = pair.first;
    8557     3106240 :         const double y = pair.second;
    8558     3106240 :         if (y < lowest_y || (y == lowest_y && x < lowest_x))
    8559             :         {
    8560     1096040 :             lowest_x = x;
    8561     1096040 :             lowest_y = y;
    8562             :         }
    8563             :     }
    8564             : 
    8565     5737980 :     const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
    8566             :     {
    8567     5737980 :         const double p1x_diff = p1.first - lowest_x;
    8568     5737980 :         const double p1y_diff = p1.second - lowest_y;
    8569     5737980 :         const double p2x_diff = p2.first - lowest_x;
    8570     5737980 :         const double p2y_diff = p2.second - lowest_y;
    8571     5737980 :         if (p2y_diff == 0.0 && p1y_diff == 0.0)
    8572             :         {
    8573     2655420 :             if (p1x_diff >= 0)
    8574             :             {
    8575     2655420 :                 if (p2x_diff >= 0)
    8576     2655420 :                     return p1.first < p2.first;
    8577           0 :                 return true;
    8578             :             }
    8579             :             else
    8580             :             {
    8581           0 :                 if (p2x_diff >= 0)
    8582           0 :                     return false;
    8583           0 :                 return p1.first < p2.first;
    8584             :             }
    8585             :         }
    8586             : 
    8587     3082560 :         if (p2x_diff == 0.0 && p1x_diff == 0.0)
    8588     1046960 :             return p1.second < p2.second;
    8589             : 
    8590             :         double tan_p1;
    8591     2035600 :         if (p1x_diff == 0.0)
    8592      464622 :             tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    8593             :         else
    8594     1570980 :             tan_p1 = p1y_diff / p1x_diff;
    8595             : 
    8596             :         double tan_p2;
    8597     2035600 :         if (p2x_diff == 0.0)
    8598      839515 :             tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    8599             :         else
    8600     1196080 :             tan_p2 = p2y_diff / p2x_diff;
    8601             : 
    8602     2035600 :         if (tan_p1 >= 0)
    8603             :         {
    8604     1904790 :             if (tan_p2 >= 0)
    8605     1881590 :                 return tan_p1 < tan_p2;
    8606             :             else
    8607       23199 :                 return true;
    8608             :         }
    8609             :         else
    8610             :         {
    8611      130806 :             if (tan_p2 >= 0)
    8612      103900 :                 return false;
    8613             :             else
    8614       26906 :                 return tan_p1 < tan_p2;
    8615             :         }
    8616      666202 :     };
    8617             : 
    8618             :     // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
    8619             :     // hull
    8620      666202 :     std::sort(intersection.begin(), intersection.end(), sortFunc);
    8621             : 
    8622             :     // Remove duplicated points
    8623      666202 :     size_t j = 1;
    8624     3106240 :     for (size_t i = 1; i < intersection.size(); ++i)
    8625             :     {
    8626     2440040 :         if (intersection[i] != intersection[i - 1])
    8627             :         {
    8628     1452560 :             if (j < i)
    8629      545275 :                 intersection[j] = intersection[i];
    8630     1452560 :             ++j;
    8631             :         }
    8632             :     }
    8633      666202 :     intersection.resize(j);
    8634             : }
    8635             : 
    8636             : /************************************************************************/
    8637             : /*                          GWKSumPreserving()                          */
    8638             : /************************************************************************/
    8639             : 
    8640             : static void GWKSumPreservingThread(void *pData);
    8641             : 
    8642          19 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
    8643             : {
    8644          19 :     return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
    8645             : }
    8646             : 
    8647          19 : static void GWKSumPreservingThread(void *pData)
    8648             : {
    8649          19 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    8650          19 :     GDALWarpKernel *poWK = psJob->poWK;
    8651          19 :     const int iYMin = psJob->iYMin;
    8652          19 :     const int iYMax = psJob->iYMax;
    8653             :     const bool bIsAffineNoRotation =
    8654          19 :         GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
    8655          28 :                                         poWK->pTransformerArg) &&
    8656             :         // for debug/testing purposes
    8657           9 :         CPLTestBool(
    8658          19 :             CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
    8659             :     const bool bAvoidNoDataSingleBand =
    8660          21 :         poWK->nBands == 1 ||
    8661           2 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    8662          19 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    8663             : 
    8664          19 :     const int nDstXSize = poWK->nDstXSize;
    8665          19 :     const int nSrcXSize = poWK->nSrcXSize;
    8666          19 :     const int nSrcYSize = poWK->nSrcYSize;
    8667             : 
    8668          38 :     std::vector<double> adfX0(nSrcXSize + 1);
    8669          38 :     std::vector<double> adfY0(nSrcXSize + 1);
    8670          38 :     std::vector<double> adfZ0(nSrcXSize + 1);
    8671          38 :     std::vector<double> adfX1(nSrcXSize + 1);
    8672          38 :     std::vector<double> adfY1(nSrcXSize + 1);
    8673          38 :     std::vector<double> adfZ1(nSrcXSize + 1);
    8674          38 :     std::vector<int> abSuccess0(nSrcXSize + 1);
    8675          38 :     std::vector<int> abSuccess1(nSrcXSize + 1);
    8676             : 
    8677             :     CPLRectObj sGlobalBounds;
    8678          19 :     sGlobalBounds.minx = -2 * poWK->dfXScale;
    8679          19 :     sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
    8680          19 :     sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
    8681          19 :     sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
    8682          19 :     CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
    8683             : 
    8684             :     struct SourcePixel
    8685             :     {
    8686             :         int iSrcX;
    8687             :         int iSrcY;
    8688             : 
    8689             :         // Coordinates of source pixel in target pixel coordinates
    8690             :         double dfDstX0;
    8691             :         double dfDstY0;
    8692             :         double dfDstX1;
    8693             :         double dfDstY1;
    8694             :         double dfDstX2;
    8695             :         double dfDstY2;
    8696             :         double dfDstX3;
    8697             :         double dfDstY3;
    8698             : 
    8699             :         // Source pixel total area (might be larger than the one described
    8700             :         // by above coordinates, if the pixel was crossing the antimeridian
    8701             :         // and split)
    8702             :         double dfArea;
    8703             :     };
    8704             : 
    8705          38 :     std::vector<SourcePixel> sourcePixels;
    8706             : 
    8707          38 :     XYPoly discontinuityLeft(5);
    8708          38 :     XYPoly discontinuityRight(5);
    8709             : 
    8710             :     /* ==================================================================== */
    8711             :     /*      First pass: transform the 4 corners of each potential           */
    8712             :     /*      contributing source pixel to target pixel coordinates.          */
    8713             :     /* ==================================================================== */
    8714             : 
    8715             :     // Special case for top line
    8716             :     {
    8717          19 :         int iY = 0;
    8718        3364 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8719             :         {
    8720        3345 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8721        3345 :             adfY1[iX] = iY + poWK->nSrcYOff;
    8722        3345 :             adfZ1[iX] = 0;
    8723             :         }
    8724             : 
    8725          19 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8726             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8727             :                              abSuccess1.data());
    8728             : 
    8729        3364 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8730             :         {
    8731        3345 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8732           0 :                 abSuccess1[iX] = FALSE;
    8733             :             else
    8734             :             {
    8735        3345 :                 adfX1[iX] -= poWK->nDstXOff;
    8736        3345 :                 adfY1[iX] -= poWK->nDstYOff;
    8737             :             }
    8738             :         }
    8739             :     }
    8740             : 
    8741        2032 :     const auto getInsideXSign = [poWK, nDstXSize](double dfX)
    8742             :     {
    8743        2032 :         return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
    8744         872 :                        dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
    8745        2032 :                    ? 1
    8746        1160 :                    : -1;
    8747          19 :     };
    8748             : 
    8749             :     const auto FindDiscontinuity =
    8750          80 :         [poWK, psJob, getInsideXSign](
    8751             :             double dfXLeft, double dfXRight, double dfY,
    8752             :             int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
    8753         800 :             double &dfXMidReprojectedRight, double &dfYMidReprojected)
    8754             :     {
    8755         880 :         for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
    8756             :         {
    8757         800 :             double dfXMid = (dfXLeft + dfXRight) / 2;
    8758         800 :             double dfXMidReprojected = dfXMid;
    8759         800 :             dfYMidReprojected = dfY;
    8760         800 :             double dfZ = 0;
    8761         800 :             int nSuccess = 0;
    8762         800 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
    8763             :                                  &dfXMidReprojected, &dfYMidReprojected, &dfZ,
    8764             :                                  &nSuccess);
    8765         800 :             if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
    8766             :             {
    8767         456 :                 dfXRight = dfXMid;
    8768         456 :                 dfXMidReprojectedRight = dfXMidReprojected;
    8769             :             }
    8770             :             else
    8771             :             {
    8772         344 :                 dfXLeft = dfXMid;
    8773         344 :                 dfXMidReprojectedLeft = dfXMidReprojected;
    8774             :             }
    8775             :         }
    8776          80 :     };
    8777             : 
    8778        2685 :     for (int iY = 0; iY < nSrcYSize; ++iY)
    8779             :     {
    8780        2666 :         std::swap(adfX0, adfX1);
    8781        2666 :         std::swap(adfY0, adfY1);
    8782        2666 :         std::swap(adfZ0, adfZ1);
    8783        2666 :         std::swap(abSuccess0, abSuccess1);
    8784             : 
    8785     4836120 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8786             :         {
    8787     4833460 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8788     4833460 :             adfY1[iX] = iY + 1 + poWK->nSrcYOff;
    8789     4833460 :             adfZ1[iX] = 0;
    8790             :         }
    8791             : 
    8792        2666 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8793             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8794             :                              abSuccess1.data());
    8795             : 
    8796     4836120 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8797             :         {
    8798     4833460 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8799           0 :                 abSuccess1[iX] = FALSE;
    8800             :             else
    8801             :             {
    8802     4833460 :                 adfX1[iX] -= poWK->nDstXOff;
    8803     4833460 :                 adfY1[iX] -= poWK->nDstYOff;
    8804             :             }
    8805             :         }
    8806             : 
    8807     4833460 :         for (int iX = 0; iX < nSrcXSize; ++iX)
    8808             :         {
    8809     9661580 :             if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
    8810     4830790 :                 abSuccess1[iX + 1])
    8811             :             {
    8812             :                 /* --------------------------------------------------------------------
    8813             :                  */
    8814             :                 /*      Do not try to apply transparent source pixels to the
    8815             :                  * destination.*/
    8816             :                 /* --------------------------------------------------------------------
    8817             :                  */
    8818     4830790 :                 const auto iSrcOffset =
    8819     4830790 :                     iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
    8820     9560570 :                 if (poWK->panUnifiedSrcValid != nullptr &&
    8821     4729780 :                     !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    8822             :                 {
    8823     4738340 :                     continue;
    8824             :                 }
    8825             : 
    8826      103415 :                 if (poWK->pafUnifiedSrcDensity != nullptr)
    8827             :                 {
    8828           0 :                     if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
    8829             :                         SRC_DENSITY_THRESHOLD_FLOAT)
    8830           0 :                         continue;
    8831             :                 }
    8832             : 
    8833             :                 SourcePixel sp;
    8834      103415 :                 sp.dfArea = 0;
    8835      103415 :                 sp.dfDstX0 = adfX0[iX];
    8836      103415 :                 sp.dfDstY0 = adfY0[iX];
    8837      103415 :                 sp.dfDstX1 = adfX0[iX + 1];
    8838      103415 :                 sp.dfDstY1 = adfY0[iX + 1];
    8839      103415 :                 sp.dfDstX2 = adfX1[iX + 1];
    8840      103415 :                 sp.dfDstY2 = adfY1[iX + 1];
    8841      103415 :                 sp.dfDstX3 = adfX1[iX];
    8842      103415 :                 sp.dfDstY3 = adfY1[iX];
    8843             : 
    8844             :                 // Detect pixel that likely cross the anti-meridian and
    8845             :                 // introduce a discontinuity when reprojected.
    8846             : 
    8847      103415 :                 if (std::fabs(adfX0[iX] - adfX0[iX + 1]) > 2 * poWK->dfXScale &&
    8848          80 :                     std::fabs(adfX1[iX] - adfX1[iX + 1]) > 2 * poWK->dfXScale &&
    8849          40 :                     getInsideXSign(adfX0[iX]) !=
    8850          80 :                         getInsideXSign(adfX0[iX + 1]) &&
    8851          80 :                     getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
    8852          40 :                     getInsideXSign(adfX0[iX + 1]) ==
    8853      103495 :                         getInsideXSign(adfX1[iX + 1]) &&
    8854          40 :                     (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
    8855             :                         0)
    8856             :                 {
    8857             : #ifdef DEBUG_VERBOSE
    8858             :                     CPLDebug(
    8859             :                         "WARP",
    8860             :                         "Discontinuity for iSrcX=%d, iSrcY=%d, dest corners:"
    8861             :                         "X0[iX]=%f X0[iX+1]=%f X1[iX]=%f X1[iX+1]=%f,"
    8862             :                         "Y0[iX]=%f Y0[iX+1]=%f Y1[iX]=%f Y1[iX+1]=%f",
    8863             :                         iX + poWK->nSrcXOff, iY + poWK->nSrcYOff, adfX0[iX],
    8864             :                         adfX0[iX + 1], adfX1[iX], adfX1[iX + 1], adfY0[iX],
    8865             :                         adfY0[iX + 1], adfY1[iX], adfY1[iX + 1]);
    8866             : #endif
    8867          40 :                     double dfXMidReprojectedLeftTop = 0;
    8868          40 :                     double dfXMidReprojectedRightTop = 0;
    8869          40 :                     double dfYMidReprojectedTop = 0;
    8870          40 :                     FindDiscontinuity(
    8871          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8872          80 :                         iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
    8873             :                         dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
    8874             :                         dfYMidReprojectedTop);
    8875          40 :                     double dfXMidReprojectedLeftBottom = 0;
    8876          40 :                     double dfXMidReprojectedRightBottom = 0;
    8877          40 :                     double dfYMidReprojectedBottom = 0;
    8878          40 :                     FindDiscontinuity(
    8879          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8880          80 :                         iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
    8881             :                         dfXMidReprojectedLeftBottom,
    8882             :                         dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
    8883             : 
    8884          40 :                     discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
    8885          40 :                     discontinuityLeft[1] =
    8886          80 :                         XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
    8887          40 :                     discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
    8888          40 :                                                   dfYMidReprojectedBottom);
    8889          40 :                     discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
    8890          40 :                     discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
    8891             : 
    8892          40 :                     discontinuityRight[0] =
    8893          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8894          40 :                     discontinuityRight[1] =
    8895          80 :                         XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
    8896          40 :                     discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
    8897          40 :                                                    dfYMidReprojectedBottom);
    8898          40 :                     discontinuityRight[3] =
    8899          80 :                         XYPair(adfX1[iX + 1], adfY1[iX + 1]);
    8900          40 :                     discontinuityRight[4] =
    8901          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8902             : 
    8903          40 :                     sp.dfArea = getArea(discontinuityLeft) +
    8904          40 :                                 getArea(discontinuityRight);
    8905          40 :                     if (getInsideXSign(adfX0[iX]) >= 1)
    8906             :                     {
    8907          20 :                         sp.dfDstX1 = dfXMidReprojectedLeftTop;
    8908          20 :                         sp.dfDstY1 = dfYMidReprojectedTop;
    8909          20 :                         sp.dfDstX2 = dfXMidReprojectedLeftBottom;
    8910          20 :                         sp.dfDstY2 = dfYMidReprojectedBottom;
    8911             :                     }
    8912             :                     else
    8913             :                     {
    8914          20 :                         sp.dfDstX0 = dfXMidReprojectedRightTop;
    8915          20 :                         sp.dfDstY0 = dfYMidReprojectedTop;
    8916          20 :                         sp.dfDstX3 = dfXMidReprojectedRightBottom;
    8917          20 :                         sp.dfDstY3 = dfYMidReprojectedBottom;
    8918             :                     }
    8919             :                 }
    8920             : 
    8921             :                 // Bounding box of source pixel (expressed in target pixel
    8922             :                 // coordinates)
    8923             :                 CPLRectObj sRect;
    8924      103415 :                 sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
    8925      103415 :                                       std::min(sp.dfDstX2, sp.dfDstX3));
    8926      103415 :                 sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
    8927      103415 :                                       std::min(sp.dfDstY2, sp.dfDstY3));
    8928      103415 :                 sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
    8929      103415 :                                       std::max(sp.dfDstX2, sp.dfDstX3));
    8930      103415 :                 sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
    8931      103415 :                                       std::max(sp.dfDstY2, sp.dfDstY3));
    8932      103415 :                 if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
    8933      101355 :                       sRect.miny < iYMax && sRect.maxy > iYMin))
    8934             :                 {
    8935       10852 :                     continue;
    8936             :                 }
    8937             : 
    8938       92563 :                 sp.iSrcX = iX;
    8939       92563 :                 sp.iSrcY = iY;
    8940             : 
    8941       92563 :                 if (!bIsAffineNoRotation)
    8942             :                 {
    8943             :                     // Check polygon validity (no self-crossing)
    8944       89745 :                     XYPair xy;
    8945       89745 :                     if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
    8946       89745 :                                         XYPair(sp.dfDstX1, sp.dfDstY1),
    8947       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8948      269235 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
    8949       89745 :                         getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
    8950       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8951       89745 :                                         XYPair(sp.dfDstX0, sp.dfDstY0),
    8952      179490 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy))
    8953             :                     {
    8954         113 :                         continue;
    8955             :                     }
    8956             :                 }
    8957             : 
    8958       92450 :                 CPLQuadTreeInsertWithBounds(
    8959             :                     hQuadTree,
    8960             :                     reinterpret_cast<void *>(
    8961       92450 :                         static_cast<uintptr_t>(sourcePixels.size())),
    8962             :                     &sRect);
    8963             : 
    8964       92450 :                 sourcePixels.push_back(sp);
    8965             :             }
    8966             :         }
    8967             :     }
    8968             : 
    8969          38 :     std::vector<double> adfRealValue(poWK->nBands);
    8970          38 :     std::vector<double> adfImagValue(poWK->nBands);
    8971          38 :     std::vector<double> adfBandDensity(poWK->nBands);
    8972          38 :     std::vector<double> adfWeight(poWK->nBands);
    8973             : 
    8974             : #ifdef CHECK_SUM_WITH_GEOS
    8975             :     auto hGEOSContext = OGRGeometry::createGEOSContext();
    8976             :     auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8977             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
    8978             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
    8979             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
    8980             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
    8981             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
    8982             :     auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
    8983             :     auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
    8984             : 
    8985             :     auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8986             :     auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
    8987             :     auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
    8988             : #endif
    8989             : 
    8990             :     const XYPoly xy1{
    8991          38 :         {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
    8992          38 :     XYPoly xy2(5);
    8993          38 :     XYPoly xy2_triangle(4);
    8994          38 :     XYPoly intersection;
    8995             : 
    8996             :     /* ==================================================================== */
    8997             :     /*      Loop over output lines.                                         */
    8998             :     /* ==================================================================== */
    8999        1951 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    9000             :     {
    9001             :         CPLRectObj sRect;
    9002        1932 :         sRect.miny = iDstY;
    9003        1932 :         sRect.maxy = iDstY + 1;
    9004             : 
    9005             :         /* ====================================================================
    9006             :          */
    9007             :         /*      Loop over pixels in output scanline. */
    9008             :         /* ====================================================================
    9009             :          */
    9010     1403940 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    9011             :         {
    9012     1402010 :             sRect.minx = iDstX;
    9013     1402010 :             sRect.maxx = iDstX + 1;
    9014     1402010 :             int nSourcePixels = 0;
    9015             :             void **pahSourcePixel =
    9016     1402010 :                 CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
    9017     1402010 :             if (nSourcePixels == 0)
    9018             :             {
    9019     1183090 :                 CPLFree(pahSourcePixel);
    9020     1183100 :                 continue;
    9021             :             }
    9022             : 
    9023      218919 :             std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
    9024      218919 :             std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
    9025      218919 :             std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
    9026      218919 :             std::fill(adfWeight.begin(), adfWeight.end(), 0);
    9027      218919 :             double dfDensity = 0;
    9028             :             // Just above zero to please Coveriy Scan
    9029      218919 :             double dfTotalWeight = std::numeric_limits<double>::min();
    9030             : 
    9031             :             /* ====================================================================
    9032             :              */
    9033             :             /*          Iterate over each contributing source pixel to add its
    9034             :              */
    9035             :             /*          value weighed by the ratio of the area of its
    9036             :              * intersection  */
    9037             :             /*          with the target pixel divided by the area of the source
    9038             :              */
    9039             :             /*          pixel. */
    9040             :             /* ====================================================================
    9041             :              */
    9042     1020550 :             for (int i = 0; i < nSourcePixels; ++i)
    9043             :             {
    9044      801628 :                 const int iSourcePixel = static_cast<int>(
    9045      801628 :                     reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
    9046      801628 :                 auto &sp = sourcePixels[iSourcePixel];
    9047             : 
    9048      801628 :                 double dfWeight = 0.0;
    9049      801628 :                 if (bIsAffineNoRotation)
    9050             :                 {
    9051             :                     // Optimization since the source pixel is a rectangle in
    9052             :                     // target pixel coordinates
    9053       16326 :                     double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
    9054       16326 :                     double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
    9055       16326 :                     double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
    9056       16326 :                     double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
    9057       16326 :                     double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
    9058       16326 :                     double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
    9059       16326 :                     double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
    9060       16326 :                     double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
    9061       16326 :                     dfWeight =
    9062       16326 :                         ((dfIntersMaxX - dfIntersMinX) *
    9063       16326 :                          (dfIntersMaxY - dfIntersMinY)) /
    9064       16326 :                         ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
    9065             :                 }
    9066             :                 else
    9067             :                 {
    9068             :                     // Compute the polygon of the source pixel in target pixel
    9069             :                     // coordinates, and shifted to the target pixel (unit square
    9070             :                     // coordinates)
    9071             : 
    9072      785302 :                     xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    9073      785302 :                     xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
    9074      785302 :                     xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
    9075      785302 :                     xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
    9076      785302 :                     xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    9077             : 
    9078      785302 :                     if (isConvex(xy2))
    9079             :                     {
    9080      785302 :                         getConvexPolyIntersection(xy1, xy2, intersection);
    9081      785302 :                         if (intersection.size() >= 3)
    9082             :                         {
    9083      468849 :                             dfWeight = getArea(intersection);
    9084             :                         }
    9085             :                     }
    9086             :                     else
    9087             :                     {
    9088             :                         // Split xy2 into 2 triangles.
    9089           0 :                         xy2_triangle[0] = xy2[0];
    9090           0 :                         xy2_triangle[1] = xy2[1];
    9091           0 :                         xy2_triangle[2] = xy2[2];
    9092           0 :                         xy2_triangle[3] = xy2[0];
    9093           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    9094             :                                                   intersection);
    9095           0 :                         if (intersection.size() >= 3)
    9096             :                         {
    9097           0 :                             dfWeight = getArea(intersection);
    9098             :                         }
    9099             : 
    9100           0 :                         xy2_triangle[1] = xy2[2];
    9101           0 :                         xy2_triangle[2] = xy2[3];
    9102           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    9103             :                                                   intersection);
    9104           0 :                         if (intersection.size() >= 3)
    9105             :                         {
    9106           0 :                             dfWeight += getArea(intersection);
    9107             :                         }
    9108             :                     }
    9109      785302 :                     if (dfWeight > 0.0)
    9110             :                     {
    9111      468828 :                         if (sp.dfArea == 0)
    9112       89592 :                             sp.dfArea = getArea(xy2);
    9113      468828 :                         dfWeight /= sp.dfArea;
    9114             :                     }
    9115             : 
    9116             : #ifdef CHECK_SUM_WITH_GEOS
    9117             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
    9118             :                                          sp.dfDstX0 - iDstX,
    9119             :                                          sp.dfDstY0 - iDstY);
    9120             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
    9121             :                                          sp.dfDstX1 - iDstX,
    9122             :                                          sp.dfDstY1 - iDstY);
    9123             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
    9124             :                                          sp.dfDstX2 - iDstX,
    9125             :                                          sp.dfDstY2 - iDstY);
    9126             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
    9127             :                                          sp.dfDstX3 - iDstX,
    9128             :                                          sp.dfDstY3 - iDstY);
    9129             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
    9130             :                                          sp.dfDstX0 - iDstX,
    9131             :                                          sp.dfDstY0 - iDstY);
    9132             : 
    9133             :                     double dfWeightGEOS = 0.0;
    9134             :                     auto hIntersection =
    9135             :                         GEOSIntersection_r(hGEOSContext, hP1, hP2);
    9136             :                     if (hIntersection)
    9137             :                     {
    9138             :                         double dfIntersArea = 0.0;
    9139             :                         if (GEOSArea_r(hGEOSContext, hIntersection,
    9140             :                                        &dfIntersArea) &&
    9141             :                             dfIntersArea > 0)
    9142             :                         {
    9143             :                             double dfSourceArea = 0.0;
    9144             :                             if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
    9145             :                             {
    9146             :                                 dfWeightGEOS = dfIntersArea / dfSourceArea;
    9147             :                             }
    9148             :                         }
    9149             :                         GEOSGeom_destroy_r(hGEOSContext, hIntersection);
    9150             :                     }
    9151             :                     if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
    9152             :                     {
    9153             :                         /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
    9154             :                                         dfWeight, dfWeightGEOS);
    9155             :                         printf("xy2: ");  // ok
    9156             :                         for (const auto &xy : xy2)
    9157             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    9158             :                         printf("\n");                                   // ok
    9159             :                         printf("intersection: ");                       // ok
    9160             :                         for (const auto &xy : intersection)
    9161             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    9162             :                         printf("\n");                                   // ok
    9163             :                     }
    9164             : #endif
    9165             :                 }
    9166      801628 :                 if (dfWeight > 0.0)
    9167             :                 {
    9168             : #ifdef DEBUG_VERBOSE
    9169             : #if defined(DST_X) && defined(DST_Y)
    9170             :                     if (iDstX + poWK->nDstXOff == DST_X &&
    9171             :                         iDstY + poWK->nDstYOff == DST_Y)
    9172             :                     {
    9173             :                         CPLDebug("WARP",
    9174             :                                  "iSrcX = %d, iSrcY = %d, weight =%.17g",
    9175             :                                  sp.iSrcX + poWK->nSrcXOff,
    9176             :                                  sp.iSrcY + poWK->nSrcYOff, dfWeight);
    9177             :                     }
    9178             : #endif
    9179             : #endif
    9180             : 
    9181      474104 :                     const GPtrDiff_t iSrcOffset =
    9182      474104 :                         sp.iSrcX +
    9183      474104 :                         static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
    9184      474104 :                     dfTotalWeight += dfWeight;
    9185             : 
    9186      474104 :                     if (poWK->pafUnifiedSrcDensity != nullptr)
    9187             :                     {
    9188           0 :                         dfDensity +=
    9189           0 :                             dfWeight *
    9190           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    9191             :                     }
    9192             :                     else
    9193             :                     {
    9194      474104 :                         dfDensity += dfWeight;
    9195             :                     }
    9196             : 
    9197     1818730 :                     for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    9198             :                     {
    9199             :                         // Returns pixel value if it is not no data.
    9200             :                         double dfBandDensity;
    9201             :                         double dfRealValue;
    9202             :                         double dfImagValue;
    9203     2689250 :                         if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
    9204             :                                                &dfBandDensity, &dfRealValue,
    9205             :                                                &dfImagValue) &&
    9206     1344620 :                               dfBandDensity > BAND_DENSITY_THRESHOLD))
    9207             :                         {
    9208           0 :                             continue;
    9209             :                         }
    9210             : #ifdef DEBUG_VERBOSE
    9211             : #if defined(DST_X) && defined(DST_Y)
    9212             :                         if (iDstX + poWK->nDstXOff == DST_X &&
    9213             :                             iDstY + poWK->nDstYOff == DST_Y)
    9214             :                         {
    9215             :                             CPLDebug("WARP", "value * weight = %.17g",
    9216             :                                      dfRealValue * dfWeight);
    9217             :                         }
    9218             : #endif
    9219             : #endif
    9220             : 
    9221     1344620 :                         adfRealValue[iBand] += dfRealValue * dfWeight;
    9222     1344620 :                         adfImagValue[iBand] += dfImagValue * dfWeight;
    9223     1344620 :                         adfBandDensity[iBand] += dfBandDensity * dfWeight;
    9224     1344620 :                         adfWeight[iBand] += dfWeight;
    9225             :                     }
    9226             :                 }
    9227             :             }
    9228             : 
    9229      218919 :             CPLFree(pahSourcePixel);
    9230             : 
    9231             :             /* --------------------------------------------------------------------
    9232             :              */
    9233             :             /*          Update destination pixel value. */
    9234             :             /* --------------------------------------------------------------------
    9235             :              */
    9236      218919 :             bool bHasFoundDensity = false;
    9237      218919 :             const GPtrDiff_t iDstOffset =
    9238      218919 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    9239      827838 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    9240             :             {
    9241      608919 :                 if (adfWeight[iBand] > 0)
    9242             :                 {
    9243             :                     const double dfBandDensity =
    9244      608909 :                         adfBandDensity[iBand] / adfWeight[iBand];
    9245      608909 :                     if (dfBandDensity > BAND_DENSITY_THRESHOLD)
    9246             :                     {
    9247      608909 :                         bHasFoundDensity = true;
    9248      608909 :                         GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    9249      608909 :                                          adfRealValue[iBand],
    9250      608909 :                                          adfImagValue[iBand],
    9251             :                                          bAvoidNoDataSingleBand);
    9252             :                     }
    9253             :                 }
    9254             :             }
    9255             : 
    9256      218919 :             if (!bHasFoundDensity)
    9257          10 :                 continue;
    9258             : 
    9259      218909 :             if (!bAvoidNoDataSingleBand)
    9260             :             {
    9261           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    9262             :             }
    9263             : 
    9264             :             /* --------------------------------------------------------------------
    9265             :              */
    9266             :             /*          Update destination density/validity masks. */
    9267             :             /* --------------------------------------------------------------------
    9268             :              */
    9269      218909 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
    9270             : 
    9271      218909 :             if (poWK->panDstValid != nullptr)
    9272             :             {
    9273       11752 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    9274             :             }
    9275             :         }
    9276             : 
    9277             :         /* --------------------------------------------------------------------
    9278             :          */
    9279             :         /*      Report progress to the user, and optionally cancel out. */
    9280             :         /* --------------------------------------------------------------------
    9281             :          */
    9282        1932 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    9283           0 :             break;
    9284             :     }
    9285             : 
    9286             : #ifdef CHECK_SUM_WITH_GEOS
    9287             :     GEOSGeom_destroy_r(hGEOSContext, hP1);
    9288             :     GEOSGeom_destroy_r(hGEOSContext, hP2);
    9289             :     OGRGeometry::freeGEOSContext(hGEOSContext);
    9290             : #endif
    9291          19 :     CPLQuadTreeDestroy(hQuadTree);
    9292          19 : }

Generated by: LCOV version 1.14