LCOV - code coverage report
Current view: top level - alg - gdalwarpkernel.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 3330 3890 85.6 %
Date: 2025-09-10 17:48:50 Functions: 211 243 86.8 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  High Performance Image Reprojector
       4             :  * Purpose:  Implementation of the GDALWarpKernel class.  Implements the actual
       5             :  *           image warping for a "chunk" of input and output imagery already
       6             :  *           loaded into memory.
       7             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       8             :  *
       9             :  ******************************************************************************
      10             :  * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
      11             :  * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
      12             :  *
      13             :  * SPDX-License-Identifier: MIT
      14             :  ****************************************************************************/
      15             : 
      16             : #include "cpl_port.h"
      17             : #include "gdalwarper.h"
      18             : 
      19             : #include <cfloat>
      20             : #include <cmath>
      21             : #include <cstddef>
      22             : #include <cstdlib>
      23             : #include <cstring>
      24             : 
      25             : #include <algorithm>
      26             : #include <limits>
      27             : #include <mutex>
      28             : #include <new>
      29             : #include <utility>
      30             : #include <vector>
      31             : 
      32             : #include "cpl_atomic_ops.h"
      33             : #include "cpl_conv.h"
      34             : #include "cpl_error.h"
      35             : #include "cpl_float.h"
      36             : #include "cpl_mask.h"
      37             : #include "cpl_multiproc.h"
      38             : #include "cpl_progress.h"
      39             : #include "cpl_string.h"
      40             : #include "cpl_vsi.h"
      41             : #include "cpl_worker_thread_pool.h"
      42             : #include "cpl_quad_tree.h"
      43             : #include "gdal.h"
      44             : #include "gdal_alg.h"
      45             : #include "gdal_alg_priv.h"
      46             : #include "gdal_thread_pool.h"
      47             : #include "gdalresamplingkernels.h"
      48             : 
      49             : // #define CHECK_SUM_WITH_GEOS
      50             : #ifdef CHECK_SUM_WITH_GEOS
      51             : #include "ogr_geometry.h"
      52             : #include "ogr_geos.h"
      53             : #endif
      54             : 
      55             : #ifdef USE_NEON_OPTIMIZATIONS
      56             : #include "include_sse2neon.h"
      57             : #define USE_SSE2
      58             : 
      59             : #include "gdalsse_priv.h"
      60             : 
      61             : // We restrict to 64bit processors because they are guaranteed to have SSE2.
      62             : // Could possibly be used too on 32bit, but we would need to check at runtime.
      63             : #elif defined(__x86_64) || defined(_M_X64)
      64             : #define USE_SSE2
      65             : 
      66             : #include "gdalsse_priv.h"
      67             : 
      68             : #if __SSE4_1__
      69             : #include <smmintrin.h>
      70             : #endif
      71             : 
      72             : #if __SSE3__
      73             : #include <pmmintrin.h>
      74             : #endif
      75             : 
      76             : #endif
      77             : 
      78             : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
      79             : constexpr float SRC_DENSITY_THRESHOLD_FLOAT = 0.000000001f;
      80             : constexpr double SRC_DENSITY_THRESHOLD_DOUBLE = 0.000000001;
      81             : 
      82             : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
      83             : 
      84             : static const int anGWKFilterRadius[] = {
      85             :     0,  // Nearest neighbour
      86             :     1,  // Bilinear
      87             :     2,  // Cubic Convolution (Catmull-Rom)
      88             :     2,  // Cubic B-Spline
      89             :     3,  // Lanczos windowed sinc
      90             :     0,  // Average
      91             :     0,  // Mode
      92             :     0,  // Reserved GRA_Gauss=7
      93             :     0,  // Max
      94             :     0,  // Min
      95             :     0,  // Med
      96             :     0,  // Q1
      97             :     0,  // Q3
      98             :     0,  // Sum
      99             :     0,  // RMS
     100             : };
     101             : 
     102             : static double GWKBilinear(double dfX);
     103             : static double GWKCubic(double dfX);
     104             : static double GWKBSpline(double dfX);
     105             : static double GWKLanczosSinc(double dfX);
     106             : 
     107             : static const FilterFuncType apfGWKFilter[] = {
     108             :     nullptr,         // Nearest neighbour
     109             :     GWKBilinear,     // Bilinear
     110             :     GWKCubic,        // Cubic Convolution (Catmull-Rom)
     111             :     GWKBSpline,      // Cubic B-Spline
     112             :     GWKLanczosSinc,  // Lanczos windowed sinc
     113             :     nullptr,         // Average
     114             :     nullptr,         // Mode
     115             :     nullptr,         // Reserved GRA_Gauss=7
     116             :     nullptr,         // Max
     117             :     nullptr,         // Min
     118             :     nullptr,         // Med
     119             :     nullptr,         // Q1
     120             :     nullptr,         // Q3
     121             :     nullptr,         // Sum
     122             :     nullptr,         // RMS
     123             : };
     124             : 
     125             : // TODO(schwehr): Can we make these functions have a const * const arg?
     126             : static double GWKBilinear4Values(double *padfVals);
     127             : static double GWKCubic4Values(double *padfVals);
     128             : static double GWKBSpline4Values(double *padfVals);
     129             : static double GWKLanczosSinc4Values(double *padfVals);
     130             : 
     131             : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
     132             :     nullptr,                // Nearest neighbour
     133             :     GWKBilinear4Values,     // Bilinear
     134             :     GWKCubic4Values,        // Cubic Convolution (Catmull-Rom)
     135             :     GWKBSpline4Values,      // Cubic B-Spline
     136             :     GWKLanczosSinc4Values,  // Lanczos windowed sinc
     137             :     nullptr,                // Average
     138             :     nullptr,                // Mode
     139             :     nullptr,                // Reserved GRA_Gauss=7
     140             :     nullptr,                // Max
     141             :     nullptr,                // Min
     142             :     nullptr,                // Med
     143             :     nullptr,                // Q1
     144             :     nullptr,                // Q3
     145             :     nullptr,                // Sum
     146             :     nullptr,                // RMS
     147             : };
     148             : 
     149       13137 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
     150             : {
     151             :     static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
     152             :                   "Bad size of anGWKFilterRadius");
     153       13137 :     return anGWKFilterRadius[eResampleAlg];
     154             : }
     155             : 
     156        5027 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
     157             : {
     158             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
     159             :                   "Bad size of apfGWKFilter");
     160        5027 :     return apfGWKFilter[eResampleAlg];
     161             : }
     162             : 
     163        5028 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
     164             : {
     165             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
     166             :                   "Bad size of apfGWKFilter4Values");
     167        5028 :     return apfGWKFilter4Values[eResampleAlg];
     168             : }
     169             : 
     170             : static CPLErr GWKGeneralCase(GDALWarpKernel *);
     171             : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
     172             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     173             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     174             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     175             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     176             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     177             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     178             : #endif
     179             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     180             : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
     181             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     182             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     183             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     184             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     185             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     186             : #endif
     187             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     188             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     189             : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
     190             : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
     191             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     192             : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
     193             : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
     194             : static CPLErr GWKSumPreserving(GDALWarpKernel *);
     195             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     196             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     197             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     198             : 
     199             : /************************************************************************/
     200             : /*                           GWKJobStruct                               */
     201             : /************************************************************************/
     202             : 
     203             : struct GWKJobStruct
     204             : {
     205             :     std::mutex &mutex;
     206             :     std::condition_variable &cv;
     207             :     int counterSingleThreaded = 0;
     208             :     int &counter;
     209             :     bool &stopFlag;
     210             :     GDALWarpKernel *poWK = nullptr;
     211             :     int iYMin = 0;
     212             :     int iYMax = 0;
     213             :     int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
     214             :     void *pTransformerArg = nullptr;
     215             :     // used by GWKRun() to assign the proper pTransformerArg
     216             :     void (*pfnFunc)(void *) = nullptr;
     217             : 
     218        2758 :     GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
     219             :                  int &counter_, bool &stopFlag_)
     220        2758 :         : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
     221             :     {
     222        2758 :     }
     223             : };
     224             : 
     225             : struct GWKThreadData
     226             : {
     227             :     std::unique_ptr<CPLJobQueue> poJobQueue{};
     228             :     std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
     229             :     int nMaxThreads{0};
     230             :     int counter{0};
     231             :     bool stopFlag{false};
     232             :     std::mutex mutex{};
     233             :     std::condition_variable cv{};
     234             :     bool bTransformerArgInputAssignedToThread{false};
     235             :     void *pTransformerArgInput{
     236             :         nullptr};  // owned by calling layer. Not to be destroyed
     237             :     std::map<GIntBig, void *> mapThreadToTransformerArg{};
     238             :     int nTotalThreadCountForThisRun = 0;
     239             :     int nCurThreadCountForThisRun = 0;
     240             : };
     241             : 
     242             : /************************************************************************/
     243             : /*                        GWKProgressThread()                           */
     244             : /************************************************************************/
     245             : 
     246             : // Return TRUE if the computation must be interrupted.
     247          36 : static int GWKProgressThread(GWKJobStruct *psJob)
     248             : {
     249          36 :     bool stop = false;
     250             :     {
     251          36 :         std::lock_guard<std::mutex> lock(psJob->mutex);
     252          36 :         psJob->counter++;
     253          36 :         stop = psJob->stopFlag;
     254             :     }
     255          36 :     psJob->cv.notify_one();
     256             : 
     257          36 :     return stop;
     258             : }
     259             : 
     260             : /************************************************************************/
     261             : /*                      GWKProgressMonoThread()                         */
     262             : /************************************************************************/
     263             : 
     264             : // Return TRUE if the computation must be interrupted.
     265      358950 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
     266             : {
     267      358950 :     GDALWarpKernel *poWK = psJob->poWK;
     268      358952 :     if (!poWK->pfnProgress(poWK->dfProgressBase +
     269      358950 :                                poWK->dfProgressScale *
     270      358950 :                                    (++psJob->counterSingleThreaded /
     271      358950 :                                     static_cast<double>(psJob->iYMax)),
     272             :                            "", poWK->pProgress))
     273             :     {
     274           2 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     275           1 :         psJob->stopFlag = true;
     276           1 :         return TRUE;
     277             :     }
     278      358950 :     return FALSE;
     279             : }
     280             : 
     281             : /************************************************************************/
     282             : /*                       GWKGenericMonoThread()                         */
     283             : /************************************************************************/
     284             : 
     285        2739 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
     286             :                                    void (*pfnFunc)(void *pUserData))
     287             : {
     288        2739 :     GWKThreadData td;
     289             : 
     290             :     // NOTE: the mutex is not used.
     291        2737 :     GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
     292        2736 :     job.poWK = poWK;
     293        2736 :     job.iYMin = 0;
     294        2736 :     job.iYMax = poWK->nDstYSize;
     295        2736 :     job.pfnProgress = GWKProgressMonoThread;
     296        2736 :     job.pTransformerArg = poWK->pTransformerArg;
     297        2736 :     job.counterSingleThreaded = td.counter;
     298        2736 :     pfnFunc(&job);
     299        2739 :     td.counter = job.counterSingleThreaded;
     300             : 
     301        5478 :     return td.stopFlag ? CE_Failure : CE_None;
     302             : }
     303             : 
     304             : /************************************************************************/
     305             : /*                          GWKThreadsCreate()                          */
     306             : /************************************************************************/
     307             : 
     308        1622 : void *GWKThreadsCreate(char **papszWarpOptions,
     309             :                        GDALTransformerFunc /* pfnTransformer */,
     310             :                        void *pTransformerArg)
     311             : {
     312             :     const char *pszWarpThreads =
     313        1622 :         CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
     314        1622 :     if (pszWarpThreads == nullptr)
     315        1605 :         pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
     316             : 
     317        1622 :     int nThreads = 0;
     318        1622 :     if (EQUAL(pszWarpThreads, "ALL_CPUS"))
     319           3 :         nThreads = CPLGetNumCPUs();
     320             :     else
     321        1619 :         nThreads = atoi(pszWarpThreads);
     322        1622 :     if (nThreads <= 1)
     323        1600 :         nThreads = 0;
     324        1622 :     if (nThreads > 128)
     325           0 :         nThreads = 128;
     326             : 
     327        1622 :     GWKThreadData *psThreadData = new GWKThreadData();
     328             :     auto poThreadPool =
     329        1622 :         nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
     330        1622 :     if (nThreads && poThreadPool)
     331             :     {
     332          22 :         psThreadData->nMaxThreads = nThreads;
     333          22 :         psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
     334             :             nThreads,
     335          22 :             GWKJobStruct(psThreadData->mutex, psThreadData->cv,
     336          44 :                          psThreadData->counter, psThreadData->stopFlag)));
     337             : 
     338          22 :         psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
     339          22 :         psThreadData->pTransformerArgInput = pTransformerArg;
     340             :     }
     341             : 
     342        1622 :     return psThreadData;
     343             : }
     344             : 
     345             : /************************************************************************/
     346             : /*                             GWKThreadsEnd()                          */
     347             : /************************************************************************/
     348             : 
     349        1622 : void GWKThreadsEnd(void *psThreadDataIn)
     350             : {
     351        1622 :     if (psThreadDataIn == nullptr)
     352           0 :         return;
     353             : 
     354        1622 :     GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
     355        1622 :     if (psThreadData->poJobQueue)
     356             :     {
     357             :         // cppcheck-suppress constVariableReference
     358          32 :         for (auto &pair : psThreadData->mapThreadToTransformerArg)
     359             :         {
     360          10 :             CPLAssert(pair.second != psThreadData->pTransformerArgInput);
     361          10 :             GDALDestroyTransformer(pair.second);
     362             :         }
     363          22 :         psThreadData->poJobQueue.reset();
     364             :     }
     365        1622 :     delete psThreadData;
     366             : }
     367             : 
     368             : /************************************************************************/
     369             : /*                         ThreadFuncAdapter()                          */
     370             : /************************************************************************/
     371             : 
     372          31 : static void ThreadFuncAdapter(void *pData)
     373             : {
     374          31 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
     375          31 :     GWKThreadData *psThreadData =
     376          31 :         static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
     377             : 
     378             :     // Look if we have already a per-thread transformer
     379          31 :     void *pTransformerArg = nullptr;
     380          31 :     const GIntBig nThreadId = CPLGetPID();
     381             : 
     382             :     {
     383          62 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     384          31 :         ++psThreadData->nCurThreadCountForThisRun;
     385             : 
     386          31 :         auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
     387          31 :         if (oIter != psThreadData->mapThreadToTransformerArg.end())
     388             :         {
     389           0 :             pTransformerArg = oIter->second;
     390             :         }
     391          31 :         else if (!psThreadData->bTransformerArgInputAssignedToThread &&
     392          31 :                  psThreadData->nCurThreadCountForThisRun ==
     393          31 :                      psThreadData->nTotalThreadCountForThisRun)
     394             :         {
     395             :             // If we are the last thread to be started, temporarily borrow the
     396             :             // original transformer
     397          21 :             psThreadData->bTransformerArgInputAssignedToThread = true;
     398          21 :             pTransformerArg = psThreadData->pTransformerArgInput;
     399          21 :             psThreadData->mapThreadToTransformerArg[nThreadId] =
     400             :                 pTransformerArg;
     401             :         }
     402             : 
     403          31 :         if (pTransformerArg == nullptr)
     404             :         {
     405          10 :             CPLAssert(psThreadData->pTransformerArgInput != nullptr);
     406          10 :             CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
     407             :         }
     408             :     }
     409             : 
     410             :     // If no transformer assigned to current thread, instantiate one
     411          31 :     if (pTransformerArg == nullptr)
     412             :     {
     413             :         // This somehow assumes that GDALCloneTransformer() is thread-safe
     414             :         // which should normally be the case.
     415             :         pTransformerArg =
     416          10 :             GDALCloneTransformer(psThreadData->pTransformerArgInput);
     417             : 
     418             :         // Lock for the stop flag and the transformer map.
     419          10 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     420          10 :         if (!pTransformerArg)
     421             :         {
     422           0 :             psJob->stopFlag = true;
     423           0 :             return;
     424             :         }
     425          10 :         psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
     426             :     }
     427             : 
     428          31 :     psJob->pTransformerArg = pTransformerArg;
     429          31 :     psJob->pfnFunc(pData);
     430             : 
     431             :     // Give back original transformer, if borrowed.
     432             :     {
     433          62 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     434          31 :         if (psThreadData->bTransformerArgInputAssignedToThread &&
     435          22 :             pTransformerArg == psThreadData->pTransformerArgInput)
     436             :         {
     437             :             psThreadData->mapThreadToTransformerArg.erase(
     438          21 :                 psThreadData->mapThreadToTransformerArg.find(nThreadId));
     439          21 :             psThreadData->bTransformerArgInputAssignedToThread = false;
     440             :         }
     441             :     }
     442             : }
     443             : 
     444             : /************************************************************************/
     445             : /*                                GWKRun()                              */
     446             : /************************************************************************/
     447             : 
     448        2757 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
     449             :                      void (*pfnFunc)(void *pUserData))
     450             : 
     451             : {
     452        2757 :     const int nDstYSize = poWK->nDstYSize;
     453             : 
     454        2757 :     CPLDebug("GDAL",
     455             :              "GDALWarpKernel()::%s() "
     456             :              "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
     457             :              pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
     458             :              poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
     459             :              poWK->nDstYSize);
     460             : 
     461        2760 :     if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
     462             :     {
     463           0 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     464           0 :         return CE_Failure;
     465             :     }
     466             : 
     467        2759 :     GWKThreadData *psThreadData =
     468             :         static_cast<GWKThreadData *>(poWK->psThreadData);
     469        2759 :     if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
     470             :     {
     471        2737 :         return GWKGenericMonoThread(poWK, pfnFunc);
     472             :     }
     473             : 
     474          22 :     int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
     475             :     // Config option mostly useful for tests to be able to test multithreading
     476             :     // with small rasters
     477             :     const int nWarpChunkSize =
     478          21 :         atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
     479          21 :     if (nWarpChunkSize > 0)
     480             :     {
     481          19 :         GIntBig nChunks =
     482          19 :             static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
     483          19 :         if (nThreads > nChunks)
     484          14 :             nThreads = static_cast<int>(nChunks);
     485             :     }
     486          21 :     if (nThreads <= 0)
     487          17 :         nThreads = 1;
     488             : 
     489          21 :     CPLDebug("WARP", "Using %d threads", nThreads);
     490             : 
     491          21 :     auto &jobs = *psThreadData->threadJobs;
     492          21 :     CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
     493             :     // Fill-in job structures.
     494          52 :     for (int i = 0; i < nThreads; ++i)
     495             :     {
     496          31 :         auto &job = jobs[i];
     497          31 :         job.poWK = poWK;
     498          31 :         job.iYMin =
     499          31 :             static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
     500          31 :         job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
     501          31 :                                      nThreads);
     502          31 :         if (poWK->pfnProgress != GDALDummyProgress)
     503           2 :             job.pfnProgress = GWKProgressThread;
     504          31 :         job.pfnFunc = pfnFunc;
     505             :     }
     506             : 
     507             :     bool bStopFlag;
     508             :     {
     509          21 :         std::unique_lock<std::mutex> lock(psThreadData->mutex);
     510             : 
     511          21 :         psThreadData->nTotalThreadCountForThisRun = nThreads;
     512             :         // coverity[missing_lock]
     513          21 :         psThreadData->nCurThreadCountForThisRun = 0;
     514             : 
     515             :         // Start jobs.
     516          52 :         for (int i = 0; i < nThreads; ++i)
     517             :         {
     518          31 :             auto &job = jobs[i];
     519          31 :             psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
     520             :                                                 static_cast<void *>(&job));
     521             :         }
     522             : 
     523             :         /* --------------------------------------------------------------------
     524             :          */
     525             :         /*      Report progress. */
     526             :         /* --------------------------------------------------------------------
     527             :          */
     528          21 :         if (poWK->pfnProgress != GDALDummyProgress)
     529             :         {
     530          15 :             while (psThreadData->counter < nDstYSize)
     531             :             {
     532          14 :                 psThreadData->cv.wait(lock);
     533          14 :                 if (!poWK->pfnProgress(poWK->dfProgressBase +
     534          14 :                                            poWK->dfProgressScale *
     535          14 :                                                (psThreadData->counter /
     536          14 :                                                 static_cast<double>(nDstYSize)),
     537             :                                        "", poWK->pProgress))
     538             :                 {
     539           1 :                     CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     540           1 :                     psThreadData->stopFlag = true;
     541           1 :                     break;
     542             :                 }
     543             :             }
     544             :         }
     545             : 
     546          21 :         bStopFlag = psThreadData->stopFlag;
     547             :     }
     548             : 
     549             :     /* -------------------------------------------------------------------- */
     550             :     /*      Wait for all jobs to complete.                                  */
     551             :     /* -------------------------------------------------------------------- */
     552          21 :     psThreadData->poJobQueue->WaitCompletion();
     553             : 
     554          21 :     return bStopFlag ? CE_Failure : CE_None;
     555             : }
     556             : 
     557             : /************************************************************************/
     558             : /* ==================================================================== */
     559             : /*                            GDALWarpKernel                            */
     560             : /* ==================================================================== */
     561             : /************************************************************************/
     562             : 
     563             : /**
     564             :  * \class GDALWarpKernel "gdalwarper.h"
     565             :  *
     566             :  * Low level image warping class.
     567             :  *
     568             :  * This class is responsible for low level image warping for one
     569             :  * "chunk" of imagery.  The class is essentially a structure with all
     570             :  * data members public - primarily so that new special-case functions
     571             :  * can be added without changing the class declaration.
     572             :  *
     573             :  * Applications are normally intended to interactive with warping facilities
     574             :  * through the GDALWarpOperation class, though the GDALWarpKernel can in
     575             :  * theory be used directly if great care is taken in setting up the
     576             :  * control data.
     577             :  *
     578             :  * <h3>Design Issues</h3>
     579             :  *
     580             :  * The intention is that PerformWarp() would analyze the setup in terms
     581             :  * of the datatype, resampling type, and validity/density mask usage and
     582             :  * pick one of many specific implementations of the warping algorithm over
     583             :  * a continuum of optimization vs. generality.  At one end there will be a
     584             :  * reference general purpose implementation of the algorithm that supports
     585             :  * any data type (working internally in double precision complex), all three
     586             :  * resampling types, and any or all of the validity/density masks.  At the
     587             :  * other end would be highly optimized algorithms for common cases like
     588             :  * nearest neighbour resampling on GDT_Byte data with no masks.
     589             :  *
     590             :  * The full set of optimized versions have not been decided but we should
     591             :  * expect to have at least:
     592             :  *  - One for each resampling algorithm for 8bit data with no masks.
     593             :  *  - One for each resampling algorithm for float data with no masks.
     594             :  *  - One for each resampling algorithm for float data with any/all masks
     595             :  *    (essentially the generic case for just float data).
     596             :  *  - One for each resampling algorithm for 8bit data with support for
     597             :  *    input validity masks (per band or per pixel).  This handles the common
     598             :  *    case of nodata masking.
     599             :  *  - One for each resampling algorithm for float data with support for
     600             :  *    input validity masks (per band or per pixel).  This handles the common
     601             :  *    case of nodata masking.
     602             :  *
     603             :  * Some of the specializations would operate on all bands in one pass
     604             :  * (especially the ones without masking would do this), while others might
     605             :  * process each band individually to reduce code complexity.
     606             :  *
     607             :  * <h3>Masking Semantics</h3>
     608             :  *
     609             :  * A detailed explanation of the semantics of the validity and density masks,
     610             :  * and their effects on resampling kernels is needed here.
     611             :  */
     612             : 
     613             : /************************************************************************/
     614             : /*                     GDALWarpKernel Data Members                      */
     615             : /************************************************************************/
     616             : 
     617             : /**
     618             :  * \var GDALResampleAlg GDALWarpKernel::eResample;
     619             :  *
     620             :  * Resampling algorithm.
     621             :  *
     622             :  * The resampling algorithm to use.  One of GRA_NearestNeighbour, GRA_Bilinear,
     623             :  * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
     624             :  * GRA_Mode or GRA_Sum.
     625             :  *
     626             :  * This field is required. GDT_NearestNeighbour may be used as a default
     627             :  * value.
     628             :  */
     629             : 
     630             : /**
     631             :  * \var GDALDataType GDALWarpKernel::eWorkingDataType;
     632             :  *
     633             :  * Working pixel data type.
     634             :  *
     635             :  * The datatype of pixels in the source image (papabySrcimage) and
     636             :  * destination image (papabyDstImage) buffers.  Note that operations on
     637             :  * some data types (such as GDT_Byte) may be much better optimized than other
     638             :  * less common cases.
     639             :  *
     640             :  * This field is required.  It may not be GDT_Unknown.
     641             :  */
     642             : 
     643             : /**
     644             :  * \var int GDALWarpKernel::nBands;
     645             :  *
     646             :  * Number of bands.
     647             :  *
     648             :  * The number of bands (layers) of imagery being warped.  Determines the
     649             :  * number of entries in the papabySrcImage, papanBandSrcValid,
     650             :  * and papabyDstImage arrays.
     651             :  *
     652             :  * This field is required.
     653             :  */
     654             : 
     655             : /**
     656             :  * \var int GDALWarpKernel::nSrcXSize;
     657             :  *
     658             :  * Source image width in pixels.
     659             :  *
     660             :  * This field is required.
     661             :  */
     662             : 
     663             : /**
     664             :  * \var int GDALWarpKernel::nSrcYSize;
     665             :  *
     666             :  * Source image height in pixels.
     667             :  *
     668             :  * This field is required.
     669             :  */
     670             : 
     671             : /**
     672             :  * \var double GDALWarpKernel::dfSrcXExtraSize;
     673             :  *
     674             :  * Number of pixels included in nSrcXSize that are present on the edges of
     675             :  * the area of interest to take into account the width of the kernel.
     676             :  *
     677             :  * This field is required.
     678             :  */
     679             : 
     680             : /**
     681             :  * \var double GDALWarpKernel::dfSrcYExtraSize;
     682             :  *
     683             :  * Number of pixels included in nSrcYExtraSize that are present on the edges of
     684             :  * the area of interest to take into account the height of the kernel.
     685             :  *
     686             :  * This field is required.
     687             :  */
     688             : 
     689             : /**
     690             :  * \var int GDALWarpKernel::papabySrcImage;
     691             :  *
     692             :  * Array of source image band data.
     693             :  *
     694             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     695             :  * to image data.  Each individual band of image data is organized as a single
     696             :  * block of image data in left to right, then bottom to top order.  The actual
     697             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     698             :  *
     699             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     700             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     701             :  * this:
     702             :  *
     703             :  * \code
     704             :  *   float dfPixelValue;
     705             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     706             :  *   int   nPixel = 3; // Zero based.
     707             :  *   int   nLine = 4;  // Zero based.
     708             :  *
     709             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     710             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     711             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     712             :  *   dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
     713             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     714             :  * \endcode
     715             :  *
     716             :  * This field is required.
     717             :  */
     718             : 
     719             : /**
     720             :  * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
     721             :  *
     722             :  * Per band validity mask for source pixels.
     723             :  *
     724             :  * Array of pixel validity mask layers for each source band.   Each of
     725             :  * the mask layers is the same size (in pixels) as the source image with
     726             :  * one bit per pixel.  Note that it is legal (and common) for this to be
     727             :  * NULL indicating that none of the pixels are invalidated, or for some
     728             :  * band validity masks to be NULL in which case all pixels of the band are
     729             :  * valid.  The following code can be used to test the validity of a particular
     730             :  * pixel.
     731             :  *
     732             :  * \code
     733             :  *   int   bIsValid = TRUE;
     734             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     735             :  *   int   nPixel = 3; // Zero based.
     736             :  *   int   nLine = 4;  // Zero based.
     737             :  *
     738             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     739             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     740             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     741             :  *
     742             :  *   if( poKern->papanBandSrcValid != NULL
     743             :  *       && poKern->papanBandSrcValid[nBand] != NULL )
     744             :  *   {
     745             :  *       GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
     746             :  *       int    iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
     747             :  *
     748             :  *       bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
     749             :  *   }
     750             :  * \endcode
     751             :  */
     752             : 
     753             : /**
     754             :  * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
     755             :  *
     756             :  * Per pixel validity mask for source pixels.
     757             :  *
     758             :  * A single validity mask layer that applies to the pixels of all source
     759             :  * bands.  It is accessed similarly to papanBandSrcValid, but without the
     760             :  * extra level of band indirection.
     761             :  *
     762             :  * This pointer may be NULL indicating that all pixels are valid.
     763             :  *
     764             :  * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
     765             :  * the pixel isn't considered to be valid unless both arrays indicate it is
     766             :  * valid.
     767             :  */
     768             : 
     769             : /**
     770             :  * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
     771             :  *
     772             :  * Per pixel density mask for source pixels.
     773             :  *
     774             :  * A single density mask layer that applies to the pixels of all source
     775             :  * bands.  It contains values between 0.0 and 1.0 indicating the degree to
     776             :  * which this pixel should be allowed to contribute to the output result.
     777             :  *
     778             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     779             :  *
     780             :  * The density for a pixel may be accessed like this:
     781             :  *
     782             :  * \code
     783             :  *   float fDensity = 1.0;
     784             :  *   int nPixel = 3;  // Zero based.
     785             :  *   int nLine = 4;   // Zero based.
     786             :  *
     787             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     788             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     789             :  *   if( poKern->pafUnifiedSrcDensity != NULL )
     790             :  *     fDensity = poKern->pafUnifiedSrcDensity
     791             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     792             :  * \endcode
     793             :  */
     794             : 
     795             : /**
     796             :  * \var int GDALWarpKernel::nDstXSize;
     797             :  *
     798             :  * Width of destination image in pixels.
     799             :  *
     800             :  * This field is required.
     801             :  */
     802             : 
     803             : /**
     804             :  * \var int GDALWarpKernel::nDstYSize;
     805             :  *
     806             :  * Height of destination image in pixels.
     807             :  *
     808             :  * This field is required.
     809             :  */
     810             : 
     811             : /**
     812             :  * \var GByte **GDALWarpKernel::papabyDstImage;
     813             :  *
     814             :  * Array of destination image band data.
     815             :  *
     816             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     817             :  * to image data.  Each individual band of image data is organized as a single
     818             :  * block of image data in left to right, then bottom to top order.  The actual
     819             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     820             :  *
     821             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     822             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     823             :  * this:
     824             :  *
     825             :  * \code
     826             :  *   float dfPixelValue;
     827             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     828             :  *   int   nPixel = 3; // Zero based.
     829             :  *   int   nLine = 4;  // Zero based.
     830             :  *
     831             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     832             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     833             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     834             :  *   dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
     835             :  *                                  [nPixel + nLine * poKern->nSrcYSize];
     836             :  * \endcode
     837             :  *
     838             :  * This field is required.
     839             :  */
     840             : 
     841             : /**
     842             :  * \var GUInt32 *GDALWarpKernel::panDstValid;
     843             :  *
     844             :  * Per pixel validity mask for destination pixels.
     845             :  *
     846             :  * A single validity mask layer that applies to the pixels of all destination
     847             :  * bands.  It is accessed similarly to papanUnitifiedSrcValid, but based
     848             :  * on the size of the destination image.
     849             :  *
     850             :  * This pointer may be NULL indicating that all pixels are valid.
     851             :  */
     852             : 
     853             : /**
     854             :  * \var float *GDALWarpKernel::pafDstDensity;
     855             :  *
     856             :  * Per pixel density mask for destination pixels.
     857             :  *
     858             :  * A single density mask layer that applies to the pixels of all destination
     859             :  * bands.  It contains values between 0.0 and 1.0.
     860             :  *
     861             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     862             :  *
     863             :  * The density for a pixel may be accessed like this:
     864             :  *
     865             :  * \code
     866             :  *   float fDensity = 1.0;
     867             :  *   int   nPixel = 3; // Zero based.
     868             :  *   int   nLine = 4;  // Zero based.
     869             :  *
     870             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     871             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     872             :  *   if( poKern->pafDstDensity != NULL )
     873             :  *     fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
     874             :  * \endcode
     875             :  */
     876             : 
     877             : /**
     878             :  * \var int GDALWarpKernel::nSrcXOff;
     879             :  *
     880             :  * X offset to source pixel coordinates for transformation.
     881             :  *
     882             :  * See pfnTransformer.
     883             :  *
     884             :  * This field is required.
     885             :  */
     886             : 
     887             : /**
     888             :  * \var int GDALWarpKernel::nSrcYOff;
     889             :  *
     890             :  * Y offset to source pixel coordinates for transformation.
     891             :  *
     892             :  * See pfnTransformer.
     893             :  *
     894             :  * This field is required.
     895             :  */
     896             : 
     897             : /**
     898             :  * \var int GDALWarpKernel::nDstXOff;
     899             :  *
     900             :  * X offset to destination pixel coordinates for transformation.
     901             :  *
     902             :  * See pfnTransformer.
     903             :  *
     904             :  * This field is required.
     905             :  */
     906             : 
     907             : /**
     908             :  * \var int GDALWarpKernel::nDstYOff;
     909             :  *
     910             :  * Y offset to destination pixel coordinates for transformation.
     911             :  *
     912             :  * See pfnTransformer.
     913             :  *
     914             :  * This field is required.
     915             :  */
     916             : 
     917             : /**
     918             :  * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
     919             :  *
     920             :  * Source/destination location transformer.
     921             :  *
     922             :  * The function to call to transform coordinates between source image
     923             :  * pixel/line coordinates and destination image pixel/line coordinates.
     924             :  * See GDALTransformerFunc() for details of the semantics of this function.
     925             :  *
     926             :  * The GDALWarpKern algorithm will only ever use this transformer in
     927             :  * "destination to source" mode (bDstToSrc=TRUE), and will always pass
     928             :  * partial or complete scanlines of points in the destination image as
     929             :  * input.  This means, among other things, that it is safe to the
     930             :  * approximating transform GDALApproxTransform() as the transformation
     931             :  * function.
     932             :  *
     933             :  * Source and destination images may be subsets of a larger overall image.
     934             :  * The transformation algorithms will expect and return pixel/line coordinates
     935             :  * in terms of this larger image, so coordinates need to be offset by
     936             :  * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
     937             :  * passing to pfnTransformer, and after return from it.
     938             :  *
     939             :  * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
     940             :  * data to this function when it is called.
     941             :  *
     942             :  * This field is required.
     943             :  */
     944             : 
     945             : /**
     946             :  * \var void *GDALWarpKernel::pTransformerArg;
     947             :  *
     948             :  * Callback data for pfnTransformer.
     949             :  *
     950             :  * This field may be NULL if not required for the pfnTransformer being used.
     951             :  */
     952             : 
     953             : /**
     954             :  * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
     955             :  *
     956             :  * The function to call to report progress of the algorithm, and to check
     957             :  * for a requested termination of the operation.  It operates according to
     958             :  * GDALProgressFunc() semantics.
     959             :  *
     960             :  * Generally speaking the progress function will be invoked for each
     961             :  * scanline of the destination buffer that has been processed.
     962             :  *
     963             :  * This field may be NULL (internally set to GDALDummyProgress()).
     964             :  */
     965             : 
     966             : /**
     967             :  * \var void *GDALWarpKernel::pProgress;
     968             :  *
     969             :  * Callback data for pfnProgress.
     970             :  *
     971             :  * This field may be NULL if not required for the pfnProgress being used.
     972             :  */
     973             : 
     974             : /************************************************************************/
     975             : /*                           GDALWarpKernel()                           */
     976             : /************************************************************************/
     977             : 
     978        2790 : GDALWarpKernel::GDALWarpKernel()
     979             :     : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
     980             :       eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
     981             :       dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
     982             :       papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
     983             :       pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
     984             :       papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
     985             :       dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
     986             :       nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
     987             :       nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
     988             :       pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
     989             :       pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
     990             :       padfDstNoDataReal(nullptr), psThreadData(nullptr),
     991        2790 :       eTieStrategy(GWKTS_First)
     992             : {
     993        2788 : }
     994             : 
     995             : /************************************************************************/
     996             : /*                          ~GDALWarpKernel()                           */
     997             : /************************************************************************/
     998             : 
     999        2790 : GDALWarpKernel::~GDALWarpKernel()
    1000             : {
    1001        2790 : }
    1002             : 
    1003             : /************************************************************************/
    1004             : /*                            PerformWarp()                             */
    1005             : /************************************************************************/
    1006             : 
    1007             : /**
    1008             :  * \fn CPLErr GDALWarpKernel::PerformWarp();
    1009             :  *
    1010             :  * This method performs the warp described in the GDALWarpKernel.
    1011             :  *
    1012             :  * @return CE_None on success or CE_Failure if an error occurs.
    1013             :  */
    1014             : 
    1015        2786 : CPLErr GDALWarpKernel::PerformWarp()
    1016             : 
    1017             : {
    1018        2786 :     const CPLErr eErr = Validate();
    1019             : 
    1020        2785 :     if (eErr != CE_None)
    1021           1 :         return eErr;
    1022             : 
    1023             :     // See #2445 and #3079.
    1024        2784 :     if (nSrcXSize <= 0 || nSrcYSize <= 0)
    1025             :     {
    1026          26 :         if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
    1027             :         {
    1028           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    1029           0 :             return CE_Failure;
    1030             :         }
    1031          25 :         return CE_None;
    1032             :     }
    1033             : 
    1034             :     /* -------------------------------------------------------------------- */
    1035             :     /*      Pre-calculate resampling scales and window sizes for filtering. */
    1036             :     /* -------------------------------------------------------------------- */
    1037             : 
    1038        2758 :     dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
    1039        2758 :     dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
    1040        2758 :     if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
    1041        1365 :         dfXScale = 1.0;
    1042        2758 :     if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
    1043        1091 :         dfYScale = 1.0;
    1044        2758 :     if (dfXScale < 1.0)
    1045             :     {
    1046         591 :         double dfXReciprocalScale = 1.0 / dfXScale;
    1047         591 :         const int nXReciprocalScale =
    1048         591 :             static_cast<int>(dfXReciprocalScale + 0.5);
    1049         591 :         if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
    1050         462 :             dfXScale = 1.0 / nXReciprocalScale;
    1051             :     }
    1052        2758 :     if (dfYScale < 1.0)
    1053             :     {
    1054         535 :         double dfYReciprocalScale = 1.0 / dfYScale;
    1055         535 :         const int nYReciprocalScale =
    1056         535 :             static_cast<int>(dfYReciprocalScale + 0.5);
    1057         535 :         if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
    1058         378 :             dfYScale = 1.0 / nYReciprocalScale;
    1059             :     }
    1060             : 
    1061             :     // XSCALE and YSCALE undocumented for now. Can help in some cases.
    1062             :     // Best would probably be a per-pixel scale computation.
    1063        2758 :     const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
    1064        2760 :     if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
    1065           1 :         dfXScale = CPLAtof(pszXScale);
    1066        2760 :     const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
    1067        2760 :     if (pszYScale != nullptr)
    1068           1 :         dfYScale = CPLAtof(pszYScale);
    1069             : 
    1070             :     // If the xscale is significantly lower than the yscale, this is highly
    1071             :     // suspicious of a situation of wrapping a very large virtual file in
    1072             :     // geographic coordinates with left and right parts being close to the
    1073             :     // antimeridian. In that situation, the xscale computed by the above method
    1074             :     // is completely wrong. Prefer doing an average of a few sample points
    1075             :     // instead
    1076        2760 :     if ((dfYScale / dfXScale > 100 ||
    1077           1 :          (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
    1078             :     {
    1079             :         // Sample points along a grid
    1080           4 :         const int nPointsX = std::min(10, nDstXSize);
    1081           4 :         const int nPointsY = std::min(10, nDstYSize);
    1082           4 :         const int nPoints = 3 * nPointsX * nPointsY;
    1083           8 :         std::vector<double> padfX;
    1084           8 :         std::vector<double> padfY;
    1085           8 :         std::vector<double> padfZ(nPoints);
    1086           8 :         std::vector<int> pabSuccess(nPoints);
    1087          44 :         for (int iY = 0; iY < nPointsY; iY++)
    1088             :         {
    1089         440 :             for (int iX = 0; iX < nPointsX; iX++)
    1090             :             {
    1091         400 :                 const double dfX =
    1092             :                     nPointsX == 1
    1093         400 :                         ? 0.0
    1094         400 :                         : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
    1095         400 :                 const double dfY =
    1096             :                     nPointsY == 1
    1097         400 :                         ? 0.0
    1098         400 :                         : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
    1099             : 
    1100             :                 // Reproject each destination sample point and its neighbours
    1101             :                 // at (x+1,y) and (x,y+1), so as to get the local scale.
    1102         400 :                 padfX.push_back(dfX);
    1103         400 :                 padfY.push_back(dfY);
    1104             : 
    1105         400 :                 padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
    1106         400 :                 padfY.push_back(dfY);
    1107             : 
    1108         400 :                 padfX.push_back(dfX);
    1109         400 :                 padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
    1110             :             }
    1111             :         }
    1112           4 :         pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
    1113           4 :                        &padfZ[0], &pabSuccess[0]);
    1114             : 
    1115             :         // Compute the xscale at each sampling point
    1116           8 :         std::vector<double> adfXScales;
    1117         404 :         for (int i = 0; i < nPoints; i += 3)
    1118             :         {
    1119         400 :             if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
    1120             :             {
    1121             :                 const double dfPointXScale =
    1122         400 :                     1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
    1123         800 :                                    std::abs(padfX[i + 2] - padfX[i]));
    1124         400 :                 adfXScales.push_back(dfPointXScale);
    1125             :             }
    1126             :         }
    1127             : 
    1128             :         // Sort by increasing xcale
    1129           4 :         std::sort(adfXScales.begin(), adfXScales.end());
    1130             : 
    1131           4 :         if (!adfXScales.empty())
    1132             :         {
    1133             :             // Compute the average of scales, but eliminate outliers small
    1134             :             // scales, if some samples are just along the discontinuity.
    1135           4 :             const double dfMaxPointXScale = adfXScales.back();
    1136           4 :             double dfSumPointXScale = 0;
    1137           4 :             int nCountPointScale = 0;
    1138         404 :             for (double dfPointXScale : adfXScales)
    1139             :             {
    1140         400 :                 if (dfPointXScale > dfMaxPointXScale / 10)
    1141             :                 {
    1142         398 :                     dfSumPointXScale += dfPointXScale;
    1143         398 :                     nCountPointScale++;
    1144             :                 }
    1145             :             }
    1146           4 :             if (nCountPointScale > 0)  // should always be true
    1147             :             {
    1148           4 :                 const double dfXScaleFromSampling =
    1149           4 :                     dfSumPointXScale / nCountPointScale;
    1150             : #if DEBUG_VERBOSE
    1151             :                 CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
    1152             :                          dfXScaleFromSampling);
    1153             : #endif
    1154           4 :                 dfXScale = dfXScaleFromSampling;
    1155             :             }
    1156             :         }
    1157             :     }
    1158             : 
    1159             : #if DEBUG_VERBOSE
    1160             :     CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
    1161             : #endif
    1162             : 
    1163        2760 :     const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
    1164             : 
    1165             :     // Safety check for callers that would use GDALWarpKernel without using
    1166             :     // GDALWarpOperation.
    1167        2697 :     if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
    1168        2634 :          ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
    1169        5520 :           !bUse4SamplesFormula)) &&
    1170         390 :         atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
    1171             :             WARP_EXTRA_ELTS)
    1172             :     {
    1173           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1174             :                  "Source arrays must have WARP_EXTRA_ELTS extra elements at "
    1175             :                  "their end. "
    1176             :                  "See GDALWarpKernel class definition. If this condition is "
    1177             :                  "fulfilled, define a EXTRA_ELTS=%d warp options",
    1178             :                  WARP_EXTRA_ELTS);
    1179           0 :         return CE_Failure;
    1180             :     }
    1181             : 
    1182        2760 :     dfXFilter = anGWKFilterRadius[eResample];
    1183        2760 :     dfYFilter = anGWKFilterRadius[eResample];
    1184             : 
    1185        2760 :     nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
    1186        2253 :                               : static_cast<int>(dfXFilter);
    1187        2760 :     nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
    1188        2253 :                               : static_cast<int>(dfYFilter);
    1189             : 
    1190             :     // Filter window offset depends on the parity of the kernel radius.
    1191        2760 :     nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
    1192        2760 :     nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
    1193             : 
    1194        2757 :     bApplyVerticalShift =
    1195        2760 :         CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
    1196        2759 :     dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
    1197        2757 :         papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
    1198             : 
    1199             :     /* -------------------------------------------------------------------- */
    1200             :     /*      Set up resampling functions.                                    */
    1201             :     /* -------------------------------------------------------------------- */
    1202        2758 :     if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
    1203          12 :         return GWKGeneralCase(this);
    1204             : 
    1205        2748 :     const bool bNoMasksOrDstDensityOnly =
    1206        2741 :         papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
    1207        5489 :         pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
    1208             : 
    1209        2748 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour &&
    1210             :         bNoMasksOrDstDensityOnly)
    1211         936 :         return GWKNearestNoMasksOrDstDensityOnlyByte(this);
    1212             : 
    1213        1812 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear &&
    1214             :         bNoMasksOrDstDensityOnly)
    1215         126 :         return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
    1216             : 
    1217        1686 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_Cubic &&
    1218             :         bNoMasksOrDstDensityOnly)
    1219         609 :         return GWKCubicNoMasksOrDstDensityOnlyByte(this);
    1220             : 
    1221        1077 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline &&
    1222             :         bNoMasksOrDstDensityOnly)
    1223          12 :         return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
    1224             : 
    1225        1065 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour)
    1226         341 :         return GWKNearestByte(this);
    1227             : 
    1228         724 :     if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
    1229         133 :         eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
    1230          14 :         return GWKNearestNoMasksOrDstDensityOnlyShort(this);
    1231             : 
    1232         710 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
    1233             :         bNoMasksOrDstDensityOnly)
    1234           5 :         return GWKCubicNoMasksOrDstDensityOnlyShort(this);
    1235             : 
    1236         705 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
    1237             :         bNoMasksOrDstDensityOnly)
    1238           6 :         return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
    1239             : 
    1240         699 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
    1241             :         bNoMasksOrDstDensityOnly)
    1242           5 :         return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
    1243             : 
    1244         694 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
    1245             :         bNoMasksOrDstDensityOnly)
    1246          14 :         return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
    1247             : 
    1248         680 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
    1249             :         bNoMasksOrDstDensityOnly)
    1250           5 :         return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
    1251             : 
    1252         675 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
    1253             :         bNoMasksOrDstDensityOnly)
    1254           6 :         return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
    1255             : 
    1256         669 :     if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
    1257          26 :         return GWKNearestShort(this);
    1258             : 
    1259         643 :     if (eWorkingDataType == GDT_UInt16 && eResample == GRA_NearestNeighbour)
    1260           6 :         return GWKNearestUnsignedShort(this);
    1261             : 
    1262         637 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
    1263             :         bNoMasksOrDstDensityOnly)
    1264          11 :         return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
    1265             : 
    1266         626 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
    1267          44 :         return GWKNearestFloat(this);
    1268             : 
    1269         582 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
    1270             :         bNoMasksOrDstDensityOnly)
    1271           4 :         return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
    1272             : 
    1273         578 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
    1274             :         bNoMasksOrDstDensityOnly)
    1275           9 :         return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
    1276             : 
    1277             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    1278             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
    1279             :         bNoMasksOrDstDensityOnly)
    1280             :         return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
    1281             : 
    1282             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
    1283             :         bNoMasksOrDstDensityOnly)
    1284             :         return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
    1285             : #endif
    1286             : 
    1287         569 :     if (eResample == GRA_Average)
    1288          77 :         return GWKAverageOrMode(this);
    1289             : 
    1290         492 :     if (eResample == GRA_RMS)
    1291           9 :         return GWKAverageOrMode(this);
    1292             : 
    1293         483 :     if (eResample == GRA_Mode)
    1294          45 :         return GWKAverageOrMode(this);
    1295             : 
    1296         438 :     if (eResample == GRA_Max)
    1297           6 :         return GWKAverageOrMode(this);
    1298             : 
    1299         432 :     if (eResample == GRA_Min)
    1300           5 :         return GWKAverageOrMode(this);
    1301             : 
    1302         427 :     if (eResample == GRA_Med)
    1303           6 :         return GWKAverageOrMode(this);
    1304             : 
    1305         421 :     if (eResample == GRA_Q1)
    1306          10 :         return GWKAverageOrMode(this);
    1307             : 
    1308         411 :     if (eResample == GRA_Q3)
    1309           5 :         return GWKAverageOrMode(this);
    1310             : 
    1311         406 :     if (eResample == GRA_Sum)
    1312          18 :         return GWKSumPreserving(this);
    1313             : 
    1314         388 :     if (!GDALDataTypeIsComplex(eWorkingDataType))
    1315             :     {
    1316         158 :         return GWKRealCase(this);
    1317             :     }
    1318             : 
    1319         227 :     return GWKGeneralCase(this);
    1320             : }
    1321             : 
    1322             : /************************************************************************/
    1323             : /*                              Validate()                              */
    1324             : /************************************************************************/
    1325             : 
    1326             : /**
    1327             :  * \fn CPLErr GDALWarpKernel::Validate()
    1328             :  *
    1329             :  * Check the settings in the GDALWarpKernel, and issue a CPLError()
    1330             :  * (and return CE_Failure) if the configuration is considered to be
    1331             :  * invalid for some reason.
    1332             :  *
    1333             :  * This method will also do some standard defaulting such as setting
    1334             :  * pfnProgress to GDALDummyProgress() if it is NULL.
    1335             :  *
    1336             :  * @return CE_None on success or CE_Failure if an error is detected.
    1337             :  */
    1338             : 
    1339        2784 : CPLErr GDALWarpKernel::Validate()
    1340             : 
    1341             : {
    1342        2784 :     if (static_cast<size_t>(eResample) >=
    1343             :         (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
    1344             :     {
    1345           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1346             :                  "Unsupported resampling method %d.",
    1347           0 :                  static_cast<int>(eResample));
    1348           0 :         return CE_Failure;
    1349             :     }
    1350             : 
    1351             :     // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
    1352             :     // be ignored as contributing source pixels during resampling. Only taken into account by
    1353             :     // Average currently
    1354             :     const char *pszExcludedValues =
    1355        2784 :         CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
    1356        2786 :     if (pszExcludedValues)
    1357             :     {
    1358             :         const CPLStringList aosTokens(
    1359          15 :             CSLTokenizeString2(pszExcludedValues, "(,)", 0));
    1360          14 :         if ((aosTokens.size() % nBands) != 0)
    1361             :         {
    1362           1 :             CPLError(CE_Failure, CPLE_AppDefined,
    1363             :                      "EXCLUDED_VALUES should contain one or several tuples of "
    1364             :                      "%d values formatted like <R>,<G>,<B> or "
    1365             :                      "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
    1366             :                      "tuples",
    1367             :                      nBands);
    1368           1 :             return CE_Failure;
    1369             :         }
    1370          26 :         std::vector<double> adfTuple;
    1371          52 :         for (int i = 0; i < aosTokens.size(); ++i)
    1372             :         {
    1373          39 :             adfTuple.push_back(CPLAtof(aosTokens[i]));
    1374          39 :             if (((i + 1) % nBands) == 0)
    1375             :             {
    1376          13 :                 m_aadfExcludedValues.push_back(adfTuple);
    1377          13 :                 adfTuple.clear();
    1378             :             }
    1379             :         }
    1380             :     }
    1381             : 
    1382        2784 :     return CE_None;
    1383             : }
    1384             : 
    1385             : /************************************************************************/
    1386             : /*                         GWKOverlayDensity()                          */
    1387             : /*                                                                      */
    1388             : /*      Compute the final density for the destination pixel.  This      */
    1389             : /*      is a function of the overlay density (passed in) and the        */
    1390             : /*      original density.                                               */
    1391             : /************************************************************************/
    1392             : 
    1393     9804390 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
    1394             :                               double dfDensity)
    1395             : {
    1396     9804390 :     if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
    1397     7844130 :         return;
    1398             : 
    1399     1960260 :     poWK->pafDstDensity[iDstOffset] =
    1400     1960260 :         1.0f -
    1401     1960260 :         (1.0f - float(dfDensity)) * (1.0f - poWK->pafDstDensity[iDstOffset]);
    1402             : }
    1403             : 
    1404             : /************************************************************************/
    1405             : /*                          GWKRoundValueT()                            */
    1406             : /************************************************************************/
    1407             : 
    1408             : template <class T, class U, bool is_signed> struct sGWKRoundValueT
    1409             : {
    1410             :     static T eval(U);
    1411             : };
    1412             : 
    1413             : template <class T, class U> struct sGWKRoundValueT<T, U, true> /* signed */
    1414             : {
    1415      791525 :     static T eval(U value)
    1416             :     {
    1417      791525 :         return static_cast<T>(floor(value + U(0.5)));
    1418             :     }
    1419             : };
    1420             : 
    1421             : template <class T, class U> struct sGWKRoundValueT<T, U, false> /* unsigned */
    1422             : {
    1423   110266097 :     static T eval(U value)
    1424             :     {
    1425   110266097 :         return static_cast<T>(value + U(0.5));
    1426             :     }
    1427             : };
    1428             : 
    1429   110641522 : template <class T, class U> static T GWKRoundValueT(U value)
    1430             : {
    1431   110641522 :     return sGWKRoundValueT<T, U, cpl::NumericLimits<T>::is_signed>::eval(value);
    1432             : }
    1433             : 
    1434      268974 : template <> float GWKRoundValueT<float, double>(double value)
    1435             : {
    1436      268974 :     return static_cast<float>(value);
    1437             : }
    1438             : 
    1439             : #ifdef notused
    1440             : template <> double GWKRoundValueT<double, double>(double value)
    1441             : {
    1442             :     return value;
    1443             : }
    1444             : #endif
    1445             : 
    1446             : /************************************************************************/
    1447             : /*                            GWKClampValueT()                          */
    1448             : /************************************************************************/
    1449             : 
    1450   105973034 : template <class T, class U> static CPL_INLINE T GWKClampValueT(U value)
    1451             : {
    1452   105973034 :     if (value < static_cast<U>(cpl::NumericLimits<T>::min()))
    1453      481927 :         return cpl::NumericLimits<T>::min();
    1454   105470426 :     else if (value > static_cast<U>(cpl::NumericLimits<T>::max()))
    1455      682785 :         return cpl::NumericLimits<T>::max();
    1456             :     else
    1457   104795726 :         return GWKRoundValueT<T, U>(value);
    1458             : }
    1459             : 
    1460      718914 : template <> float GWKClampValueT<float, double>(double dfValue)
    1461             : {
    1462      718914 :     return static_cast<float>(dfValue);
    1463             : }
    1464             : 
    1465             : #ifdef notused
    1466             : template <> double GWKClampValueT<double, double>(double dfValue)
    1467             : {
    1468             :     return dfValue;
    1469             : }
    1470             : #endif
    1471             : 
    1472             : /************************************************************************/
    1473             : /*                             AvoidNoData()                            */
    1474             : /************************************************************************/
    1475             : 
    1476             : template <class T>
    1477    14880669 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1478             :                         GPtrDiff_t iDstOffset)
    1479             : {
    1480    14880669 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1481    14880669 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1482             : 
    1483    14880669 :     if (poWK->padfDstNoDataReal != nullptr &&
    1484     6831481 :         poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
    1485             :     {
    1486             :         if constexpr (cpl::NumericLimits<T>::is_integer)
    1487             :         {
    1488        2637 :             if (pDst[iDstOffset] ==
    1489        2637 :                 static_cast<T>(cpl::NumericLimits<T>::lowest()))
    1490             :             {
    1491        2509 :                 pDst[iDstOffset] =
    1492        2509 :                     static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
    1493             :             }
    1494             :             else
    1495         128 :                 pDst[iDstOffset]--;
    1496             :         }
    1497             :         else
    1498             :         {
    1499          64 :             if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
    1500             :             {
    1501             :                 using std::nextafter;
    1502           0 :                 pDst[iDstOffset] =
    1503           0 :                     nextafter(pDst[iDstOffset], static_cast<T>(0));
    1504             :             }
    1505             :             else
    1506             :             {
    1507             :                 using std::nextafter;
    1508          64 :                 pDst[iDstOffset] =
    1509          64 :                     nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
    1510             :             }
    1511             :         }
    1512             : 
    1513        2701 :         if (!poWK->bWarnedAboutDstNoDataReplacement)
    1514             :         {
    1515          25 :             const_cast<GDALWarpKernel *>(poWK)
    1516             :                 ->bWarnedAboutDstNoDataReplacement = true;
    1517          25 :             CPLError(CE_Warning, CPLE_AppDefined,
    1518             :                      "Value %g in the source dataset has been changed to %g "
    1519             :                      "in the destination dataset to avoid being treated as "
    1520             :                      "NoData. To avoid this, select a different NoData value "
    1521             :                      "for the destination dataset.",
    1522          25 :                      poWK->padfDstNoDataReal[iBand],
    1523          25 :                      static_cast<double>(pDst[iDstOffset]));
    1524             :         }
    1525             :     }
    1526    14880669 : }
    1527             : 
    1528             : /************************************************************************/
    1529             : /*                         GWKSetPixelValueRealT()                      */
    1530             : /************************************************************************/
    1531             : 
    1532             : template <class T>
    1533     9788354 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
    1534             :                                   GPtrDiff_t iDstOffset, double dfDensity,
    1535             :                                   T value)
    1536             : {
    1537     9788354 :     T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    1538             : 
    1539             :     /* -------------------------------------------------------------------- */
    1540             :     /*      If the source density is less than 100% we need to fetch the    */
    1541             :     /*      existing destination value, and mix it with the source to       */
    1542             :     /*      get the new "to apply" value.  Also compute composite           */
    1543             :     /*      density.                                                        */
    1544             :     /*                                                                      */
    1545             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1546             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1547             :     /* -------------------------------------------------------------------- */
    1548     9788354 :     if (dfDensity < 0.9999)
    1549             :     {
    1550      945508 :         if (dfDensity < 0.0001)
    1551           0 :             return true;
    1552             : 
    1553      945508 :         double dfDstDensity = 1.0;
    1554             : 
    1555      945508 :         if (poWK->pafDstDensity != nullptr)
    1556      944036 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    1557        1472 :         else if (poWK->panDstValid != nullptr &&
    1558           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1559           0 :             dfDstDensity = 0.0;
    1560             : 
    1561             :         // It seems like we also ought to be testing panDstValid[] here!
    1562             : 
    1563      945508 :         const double dfDstReal = static_cast<double>(pDst[iDstOffset]);
    1564             : 
    1565             :         // The destination density is really only relative to the portion
    1566             :         // not occluded by the overlay.
    1567      945508 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1568             : 
    1569      945508 :         const double dfReal =
    1570      945508 :             (double(value) * dfDensity + dfDstReal * dfDstInfluence) /
    1571      945508 :             (dfDensity + dfDstInfluence);
    1572             : 
    1573             :         /* --------------------------------------------------------------------
    1574             :          */
    1575             :         /*      Actually apply the destination value. */
    1576             :         /*                                                                      */
    1577             :         /*      Avoid using the destination nodata value for integer datatypes
    1578             :          */
    1579             :         /*      if by chance it is equal to the computed pixel value. */
    1580             :         /* --------------------------------------------------------------------
    1581             :          */
    1582      945508 :         pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
    1583             :     }
    1584             :     else
    1585             :     {
    1586     8842853 :         pDst[iDstOffset] = value;
    1587             :     }
    1588             : 
    1589     9788354 :     AvoidNoData<T>(poWK, iBand, iDstOffset);
    1590             : 
    1591     9788354 :     return true;
    1592             : }
    1593             : 
    1594             : /************************************************************************/
    1595             : /*                       ClampRoundAndAvoidNoData()                     */
    1596             : /************************************************************************/
    1597             : 
    1598             : template <class T>
    1599     5092335 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1600             :                                      GPtrDiff_t iDstOffset, double dfReal)
    1601             : {
    1602     5092335 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1603     5092335 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1604             : 
    1605             :     if constexpr (cpl::NumericLimits<T>::is_integer)
    1606             :     {
    1607             :         using std::floor;
    1608     4609439 :         if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
    1609        5308 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
    1610     4604139 :         else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1611       23628 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
    1612             :         else if constexpr (cpl::NumericLimits<T>::is_signed)
    1613        9834 :             pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
    1614             :         else
    1615     4570675 :             pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
    1616             :     }
    1617             :     else
    1618             :     {
    1619      482896 :         pDst[iDstOffset] = static_cast<T>(dfReal);
    1620             :     }
    1621             : 
    1622     5092335 :     AvoidNoData<T>(poWK, iBand, iDstOffset);
    1623     5092335 : }
    1624             : 
    1625             : /************************************************************************/
    1626             : /*                          GWKSetPixelValue()                          */
    1627             : /************************************************************************/
    1628             : 
    1629     4012410 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
    1630             :                              GPtrDiff_t iDstOffset, double dfDensity,
    1631             :                              double dfReal, double dfImag)
    1632             : 
    1633             : {
    1634     4012410 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1635             : 
    1636             :     /* -------------------------------------------------------------------- */
    1637             :     /*      If the source density is less than 100% we need to fetch the    */
    1638             :     /*      existing destination value, and mix it with the source to       */
    1639             :     /*      get the new "to apply" value.  Also compute composite           */
    1640             :     /*      density.                                                        */
    1641             :     /*                                                                      */
    1642             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1643             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1644             :     /* -------------------------------------------------------------------- */
    1645     4012410 :     if (dfDensity < 0.9999)
    1646             :     {
    1647         800 :         if (dfDensity < 0.0001)
    1648           0 :             return true;
    1649             : 
    1650         800 :         double dfDstDensity = 1.0;
    1651         800 :         if (poWK->pafDstDensity != nullptr)
    1652         800 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    1653           0 :         else if (poWK->panDstValid != nullptr &&
    1654           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1655           0 :             dfDstDensity = 0.0;
    1656             : 
    1657         800 :         double dfDstReal = 0.0;
    1658         800 :         double dfDstImag = 0.0;
    1659             :         // It seems like we also ought to be testing panDstValid[] here!
    1660             : 
    1661             :         // TODO(schwehr): Factor out this repreated type of set.
    1662         800 :         switch (poWK->eWorkingDataType)
    1663             :         {
    1664           0 :             case GDT_Byte:
    1665           0 :                 dfDstReal = pabyDst[iDstOffset];
    1666           0 :                 dfDstImag = 0.0;
    1667           0 :                 break;
    1668             : 
    1669           0 :             case GDT_Int8:
    1670           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1671           0 :                 dfDstImag = 0.0;
    1672           0 :                 break;
    1673             : 
    1674         400 :             case GDT_Int16:
    1675         400 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1676         400 :                 dfDstImag = 0.0;
    1677         400 :                 break;
    1678             : 
    1679         400 :             case GDT_UInt16:
    1680         400 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1681         400 :                 dfDstImag = 0.0;
    1682         400 :                 break;
    1683             : 
    1684           0 :             case GDT_Int32:
    1685           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1686           0 :                 dfDstImag = 0.0;
    1687           0 :                 break;
    1688             : 
    1689           0 :             case GDT_UInt32:
    1690           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1691           0 :                 dfDstImag = 0.0;
    1692           0 :                 break;
    1693             : 
    1694           0 :             case GDT_Int64:
    1695           0 :                 dfDstReal = static_cast<double>(
    1696           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1697           0 :                 dfDstImag = 0.0;
    1698           0 :                 break;
    1699             : 
    1700           0 :             case GDT_UInt64:
    1701           0 :                 dfDstReal = static_cast<double>(
    1702           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1703           0 :                 dfDstImag = 0.0;
    1704           0 :                 break;
    1705             : 
    1706           0 :             case GDT_Float16:
    1707           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    1708           0 :                 dfDstImag = 0.0;
    1709           0 :                 break;
    1710             : 
    1711           0 :             case GDT_Float32:
    1712           0 :                 dfDstReal =
    1713           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
    1714           0 :                 dfDstImag = 0.0;
    1715           0 :                 break;
    1716             : 
    1717           0 :             case GDT_Float64:
    1718           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1719           0 :                 dfDstImag = 0.0;
    1720           0 :                 break;
    1721             : 
    1722           0 :             case GDT_CInt16:
    1723           0 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
    1724           0 :                 dfDstImag =
    1725           0 :                     reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
    1726           0 :                 break;
    1727             : 
    1728           0 :             case GDT_CInt32:
    1729           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
    1730           0 :                 dfDstImag =
    1731           0 :                     reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
    1732           0 :                 break;
    1733             : 
    1734           0 :             case GDT_CFloat16:
    1735             :                 dfDstReal =
    1736           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
    1737             :                 dfDstImag =
    1738           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
    1739           0 :                 break;
    1740             : 
    1741           0 :             case GDT_CFloat32:
    1742           0 :                 dfDstReal =
    1743           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset * 2]);
    1744           0 :                 dfDstImag = double(
    1745           0 :                     reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1]);
    1746           0 :                 break;
    1747             : 
    1748           0 :             case GDT_CFloat64:
    1749           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
    1750           0 :                 dfDstImag =
    1751           0 :                     reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
    1752           0 :                 break;
    1753             : 
    1754           0 :             case GDT_Unknown:
    1755             :             case GDT_TypeCount:
    1756           0 :                 CPLAssert(false);
    1757             :                 return false;
    1758             :         }
    1759             : 
    1760             :         // The destination density is really only relative to the portion
    1761             :         // not occluded by the overlay.
    1762         800 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1763             : 
    1764         800 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    1765         800 :                  (dfDensity + dfDstInfluence);
    1766             : 
    1767         800 :         dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
    1768         800 :                  (dfDensity + dfDstInfluence);
    1769             :     }
    1770             : 
    1771             :     /* -------------------------------------------------------------------- */
    1772             :     /*      Actually apply the destination value.                           */
    1773             :     /*                                                                      */
    1774             :     /*      Avoid using the destination nodata value for integer datatypes  */
    1775             :     /*      if by chance it is equal to the computed pixel value.           */
    1776             :     /* -------------------------------------------------------------------- */
    1777             : 
    1778     4012410 :     switch (poWK->eWorkingDataType)
    1779             :     {
    1780     3290010 :         case GDT_Byte:
    1781     3290010 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
    1782     3290010 :             break;
    1783             : 
    1784           0 :         case GDT_Int8:
    1785           0 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
    1786           0 :             break;
    1787             : 
    1788        7472 :         case GDT_Int16:
    1789        7472 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
    1790        7472 :             break;
    1791             : 
    1792         464 :         case GDT_UInt16:
    1793         464 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
    1794         464 :             break;
    1795             : 
    1796          63 :         case GDT_UInt32:
    1797          63 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
    1798          63 :             break;
    1799             : 
    1800          63 :         case GDT_Int32:
    1801          63 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
    1802          63 :             break;
    1803             : 
    1804           0 :         case GDT_UInt64:
    1805           0 :             ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
    1806             :                                                     dfReal);
    1807           0 :             break;
    1808             : 
    1809           0 :         case GDT_Int64:
    1810           0 :             ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
    1811             :                                                    dfReal);
    1812           0 :             break;
    1813             : 
    1814           0 :         case GDT_Float16:
    1815           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
    1816           0 :             break;
    1817             : 
    1818      478957 :         case GDT_Float32:
    1819      478957 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
    1820      478957 :             break;
    1821             : 
    1822         147 :         case GDT_Float64:
    1823         147 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
    1824         147 :             break;
    1825             : 
    1826      234079 :         case GDT_CInt16:
    1827             :         {
    1828             :             typedef GInt16 T;
    1829      234079 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    1830           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1831           0 :                     cpl::NumericLimits<T>::min();
    1832      234079 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1833           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1834           0 :                     cpl::NumericLimits<T>::max();
    1835             :             else
    1836      234079 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1837      234079 :                     static_cast<T>(floor(dfReal + 0.5));
    1838      234079 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    1839           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1840           0 :                     cpl::NumericLimits<T>::min();
    1841      234079 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    1842           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1843           0 :                     cpl::NumericLimits<T>::max();
    1844             :             else
    1845      234079 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1846      234079 :                     static_cast<T>(floor(dfImag + 0.5));
    1847      234079 :             break;
    1848             :         }
    1849             : 
    1850         379 :         case GDT_CInt32:
    1851             :         {
    1852             :             typedef GInt32 T;
    1853         379 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    1854           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1855           0 :                     cpl::NumericLimits<T>::min();
    1856         379 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1857           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1858           0 :                     cpl::NumericLimits<T>::max();
    1859             :             else
    1860         379 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1861         379 :                     static_cast<T>(floor(dfReal + 0.5));
    1862         379 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    1863           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1864           0 :                     cpl::NumericLimits<T>::min();
    1865         379 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    1866           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1867           0 :                     cpl::NumericLimits<T>::max();
    1868             :             else
    1869         379 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1870         379 :                     static_cast<T>(floor(dfImag + 0.5));
    1871         379 :             break;
    1872             :         }
    1873             : 
    1874           0 :         case GDT_CFloat16:
    1875           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
    1876           0 :                 static_cast<GFloat16>(dfReal);
    1877           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
    1878           0 :                 static_cast<GFloat16>(dfImag);
    1879           0 :             break;
    1880             : 
    1881         394 :         case GDT_CFloat32:
    1882         394 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
    1883         394 :                 static_cast<float>(dfReal);
    1884         394 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
    1885         394 :                 static_cast<float>(dfImag);
    1886         394 :             break;
    1887             : 
    1888         380 :         case GDT_CFloat64:
    1889         380 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
    1890         380 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
    1891         380 :             break;
    1892             : 
    1893           0 :         case GDT_Unknown:
    1894             :         case GDT_TypeCount:
    1895           0 :             return false;
    1896             :     }
    1897             : 
    1898     4012410 :     return true;
    1899             : }
    1900             : 
    1901             : /************************************************************************/
    1902             : /*                       GWKSetPixelValueReal()                         */
    1903             : /************************************************************************/
    1904             : 
    1905     1315160 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    1906             :                                  GPtrDiff_t iDstOffset, double dfDensity,
    1907             :                                  double dfReal)
    1908             : 
    1909             : {
    1910     1315160 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1911             : 
    1912             :     /* -------------------------------------------------------------------- */
    1913             :     /*      If the source density is less than 100% we need to fetch the    */
    1914             :     /*      existing destination value, and mix it with the source to       */
    1915             :     /*      get the new "to apply" value.  Also compute composite           */
    1916             :     /*      density.                                                        */
    1917             :     /*                                                                      */
    1918             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1919             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1920             :     /* -------------------------------------------------------------------- */
    1921     1315160 :     if (dfDensity < 0.9999)
    1922             :     {
    1923         600 :         if (dfDensity < 0.0001)
    1924           0 :             return true;
    1925             : 
    1926         600 :         double dfDstReal = 0.0;
    1927         600 :         double dfDstDensity = 1.0;
    1928             : 
    1929         600 :         if (poWK->pafDstDensity != nullptr)
    1930         600 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    1931           0 :         else if (poWK->panDstValid != nullptr &&
    1932           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1933           0 :             dfDstDensity = 0.0;
    1934             : 
    1935             :         // It seems like we also ought to be testing panDstValid[] here!
    1936             : 
    1937         600 :         switch (poWK->eWorkingDataType)
    1938             :         {
    1939           0 :             case GDT_Byte:
    1940           0 :                 dfDstReal = pabyDst[iDstOffset];
    1941           0 :                 break;
    1942             : 
    1943           0 :             case GDT_Int8:
    1944           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1945           0 :                 break;
    1946             : 
    1947         300 :             case GDT_Int16:
    1948         300 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1949         300 :                 break;
    1950             : 
    1951         300 :             case GDT_UInt16:
    1952         300 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1953         300 :                 break;
    1954             : 
    1955           0 :             case GDT_Int32:
    1956           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1957           0 :                 break;
    1958             : 
    1959           0 :             case GDT_UInt32:
    1960           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1961           0 :                 break;
    1962             : 
    1963           0 :             case GDT_Int64:
    1964           0 :                 dfDstReal = static_cast<double>(
    1965           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1966           0 :                 break;
    1967             : 
    1968           0 :             case GDT_UInt64:
    1969           0 :                 dfDstReal = static_cast<double>(
    1970           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1971           0 :                 break;
    1972             : 
    1973           0 :             case GDT_Float16:
    1974           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    1975           0 :                 break;
    1976             : 
    1977           0 :             case GDT_Float32:
    1978           0 :                 dfDstReal =
    1979           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
    1980           0 :                 break;
    1981             : 
    1982           0 :             case GDT_Float64:
    1983           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1984           0 :                 break;
    1985             : 
    1986           0 :             case GDT_CInt16:
    1987             :             case GDT_CInt32:
    1988             :             case GDT_CFloat16:
    1989             :             case GDT_CFloat32:
    1990             :             case GDT_CFloat64:
    1991             :             case GDT_Unknown:
    1992             :             case GDT_TypeCount:
    1993           0 :                 CPLAssert(false);
    1994             :                 return false;
    1995             :         }
    1996             : 
    1997             :         // The destination density is really only relative to the portion
    1998             :         // not occluded by the overlay.
    1999         600 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    2000             : 
    2001         600 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    2002         600 :                  (dfDensity + dfDstInfluence);
    2003             :     }
    2004             : 
    2005             :     /* -------------------------------------------------------------------- */
    2006             :     /*      Actually apply the destination value.                           */
    2007             :     /*                                                                      */
    2008             :     /*      Avoid using the destination nodata value for integer datatypes  */
    2009             :     /*      if by chance it is equal to the computed pixel value.           */
    2010             :     /* -------------------------------------------------------------------- */
    2011             : 
    2012     1315160 :     switch (poWK->eWorkingDataType)
    2013             :     {
    2014     1308310 :         case GDT_Byte:
    2015     1308310 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
    2016     1308310 :             break;
    2017             : 
    2018           0 :         case GDT_Int8:
    2019           0 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
    2020           0 :             break;
    2021             : 
    2022        1117 :         case GDT_Int16:
    2023        1117 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
    2024        1117 :             break;
    2025             : 
    2026         379 :         case GDT_UInt16:
    2027         379 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
    2028         379 :             break;
    2029             : 
    2030         347 :         case GDT_UInt32:
    2031         347 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
    2032         347 :             break;
    2033             : 
    2034        1150 :         case GDT_Int32:
    2035        1150 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
    2036        1150 :             break;
    2037             : 
    2038          32 :         case GDT_UInt64:
    2039          32 :             ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
    2040             :                                                     dfReal);
    2041          32 :             break;
    2042             : 
    2043          32 :         case GDT_Int64:
    2044          32 :             ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
    2045             :                                                    dfReal);
    2046          32 :             break;
    2047             : 
    2048           0 :         case GDT_Float16:
    2049           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
    2050           0 :             break;
    2051             : 
    2052        3442 :         case GDT_Float32:
    2053        3442 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
    2054        3442 :             break;
    2055             : 
    2056         350 :         case GDT_Float64:
    2057         350 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
    2058         350 :             break;
    2059             : 
    2060           0 :         case GDT_CInt16:
    2061             :         case GDT_CInt32:
    2062             :         case GDT_CFloat16:
    2063             :         case GDT_CFloat32:
    2064             :         case GDT_CFloat64:
    2065           0 :             return false;
    2066             : 
    2067           0 :         case GDT_Unknown:
    2068             :         case GDT_TypeCount:
    2069           0 :             CPLAssert(false);
    2070             :             return false;
    2071             :     }
    2072             : 
    2073     1315160 :     return true;
    2074             : }
    2075             : 
    2076             : /************************************************************************/
    2077             : /*                          GWKGetPixelValue()                          */
    2078             : /************************************************************************/
    2079             : 
    2080             : /* It is assumed that panUnifiedSrcValid has been checked before */
    2081             : 
    2082    30506400 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
    2083             :                              GPtrDiff_t iSrcOffset, double *pdfDensity,
    2084             :                              double *pdfReal, double *pdfImag)
    2085             : 
    2086             : {
    2087    30506400 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2088             : 
    2089    61012700 :     if (poWK->papanBandSrcValid != nullptr &&
    2090    30506400 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2091           0 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2092             :     {
    2093           0 :         *pdfDensity = 0.0;
    2094           0 :         return false;
    2095             :     }
    2096             : 
    2097    30506400 :     *pdfReal = 0.0;
    2098    30506400 :     *pdfImag = 0.0;
    2099             : 
    2100             :     // TODO(schwehr): Fix casting.
    2101    30506400 :     switch (poWK->eWorkingDataType)
    2102             :     {
    2103    29429400 :         case GDT_Byte:
    2104    29429400 :             *pdfReal = pabySrc[iSrcOffset];
    2105    29429400 :             *pdfImag = 0.0;
    2106    29429400 :             break;
    2107             : 
    2108           0 :         case GDT_Int8:
    2109           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2110           0 :             *pdfImag = 0.0;
    2111           0 :             break;
    2112             : 
    2113       28232 :         case GDT_Int16:
    2114       28232 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2115       28232 :             *pdfImag = 0.0;
    2116       28232 :             break;
    2117             : 
    2118         166 :         case GDT_UInt16:
    2119         166 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2120         166 :             *pdfImag = 0.0;
    2121         166 :             break;
    2122             : 
    2123          63 :         case GDT_Int32:
    2124          63 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2125          63 :             *pdfImag = 0.0;
    2126          63 :             break;
    2127             : 
    2128          63 :         case GDT_UInt32:
    2129          63 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2130          63 :             *pdfImag = 0.0;
    2131          63 :             break;
    2132             : 
    2133           0 :         case GDT_Int64:
    2134           0 :             *pdfReal = static_cast<double>(
    2135           0 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2136           0 :             *pdfImag = 0.0;
    2137           0 :             break;
    2138             : 
    2139           0 :         case GDT_UInt64:
    2140           0 :             *pdfReal = static_cast<double>(
    2141           0 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2142           0 :             *pdfImag = 0.0;
    2143           0 :             break;
    2144             : 
    2145           0 :         case GDT_Float16:
    2146           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2147           0 :             *pdfImag = 0.0;
    2148           0 :             break;
    2149             : 
    2150     1047220 :         case GDT_Float32:
    2151     1047220 :             *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
    2152     1047220 :             *pdfImag = 0.0;
    2153     1047220 :             break;
    2154             : 
    2155         582 :         case GDT_Float64:
    2156         582 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2157         582 :             *pdfImag = 0.0;
    2158         582 :             break;
    2159             : 
    2160         133 :         case GDT_CInt16:
    2161         133 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
    2162         133 :             *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2163         133 :             break;
    2164             : 
    2165         133 :         case GDT_CInt32:
    2166         133 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
    2167         133 :             *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
    2168         133 :             break;
    2169             : 
    2170           0 :         case GDT_CFloat16:
    2171           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
    2172           0 :             *pdfImag =
    2173           0 :                 reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2174           0 :             break;
    2175             : 
    2176         194 :         case GDT_CFloat32:
    2177         194 :             *pdfReal =
    2178         194 :                 double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2]);
    2179         194 :             *pdfImag =
    2180         194 :                 double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1]);
    2181         194 :             break;
    2182             : 
    2183         138 :         case GDT_CFloat64:
    2184         138 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
    2185         138 :             *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
    2186         138 :             break;
    2187             : 
    2188           0 :         case GDT_Unknown:
    2189             :         case GDT_TypeCount:
    2190           0 :             CPLAssert(false);
    2191             :             *pdfDensity = 0.0;
    2192             :             return false;
    2193             :     }
    2194             : 
    2195    30506400 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2196     4194800 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2197             :     else
    2198    26311600 :         *pdfDensity = 1.0;
    2199             : 
    2200    30506400 :     return *pdfDensity != 0.0;
    2201             : }
    2202             : 
    2203             : /************************************************************************/
    2204             : /*                       GWKGetPixelValueReal()                         */
    2205             : /************************************************************************/
    2206             : 
    2207        1092 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2208             :                                  GPtrDiff_t iSrcOffset, double *pdfDensity,
    2209             :                                  double *pdfReal)
    2210             : 
    2211             : {
    2212        1092 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2213             : 
    2214        2186 :     if (poWK->papanBandSrcValid != nullptr &&
    2215        1094 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2216           2 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2217             :     {
    2218           0 :         *pdfDensity = 0.0;
    2219           0 :         return false;
    2220             :     }
    2221             : 
    2222        1092 :     switch (poWK->eWorkingDataType)
    2223             :     {
    2224           1 :         case GDT_Byte:
    2225           1 :             *pdfReal = pabySrc[iSrcOffset];
    2226           1 :             break;
    2227             : 
    2228           0 :         case GDT_Int8:
    2229           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2230           0 :             break;
    2231             : 
    2232           1 :         case GDT_Int16:
    2233           1 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2234           1 :             break;
    2235             : 
    2236           1 :         case GDT_UInt16:
    2237           1 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2238           1 :             break;
    2239             : 
    2240         886 :         case GDT_Int32:
    2241         886 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2242         886 :             break;
    2243             : 
    2244          83 :         case GDT_UInt32:
    2245          83 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2246          83 :             break;
    2247             : 
    2248          16 :         case GDT_Int64:
    2249          16 :             *pdfReal = static_cast<double>(
    2250          16 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2251          16 :             break;
    2252             : 
    2253          16 :         case GDT_UInt64:
    2254          16 :             *pdfReal = static_cast<double>(
    2255          16 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2256          16 :             break;
    2257             : 
    2258           0 :         case GDT_Float16:
    2259           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2260           0 :             break;
    2261             : 
    2262           2 :         case GDT_Float32:
    2263           2 :             *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
    2264           2 :             break;
    2265             : 
    2266          86 :         case GDT_Float64:
    2267          86 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2268          86 :             break;
    2269             : 
    2270           0 :         case GDT_CInt16:
    2271             :         case GDT_CInt32:
    2272             :         case GDT_CFloat16:
    2273             :         case GDT_CFloat32:
    2274             :         case GDT_CFloat64:
    2275             :         case GDT_Unknown:
    2276             :         case GDT_TypeCount:
    2277           0 :             CPLAssert(false);
    2278             :             return false;
    2279             :     }
    2280             : 
    2281        1092 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2282           0 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2283             :     else
    2284        1092 :         *pdfDensity = 1.0;
    2285             : 
    2286        1092 :     return *pdfDensity != 0.0;
    2287             : }
    2288             : 
    2289             : /************************************************************************/
    2290             : /*                          GWKGetPixelRow()                            */
    2291             : /************************************************************************/
    2292             : 
    2293             : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
    2294             : /* data-types. */
    2295             : 
    2296     2368030 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
    2297             :                            GPtrDiff_t iSrcOffset, int nHalfSrcLen,
    2298             :                            double *padfDensity, double adfReal[],
    2299             :                            double *padfImag)
    2300             : {
    2301             :     // We know that nSrcLen is even, so we can *always* unroll loops 2x.
    2302     2368030 :     const int nSrcLen = nHalfSrcLen * 2;
    2303     2368030 :     bool bHasValid = false;
    2304             : 
    2305     2368030 :     if (padfDensity != nullptr)
    2306             :     {
    2307             :         // Init the density.
    2308     3380670 :         for (int i = 0; i < nSrcLen; i += 2)
    2309             :         {
    2310     2210230 :             padfDensity[i] = 1.0;
    2311     2210230 :             padfDensity[i + 1] = 1.0;
    2312             :         }
    2313             : 
    2314     1170440 :         if (poWK->panUnifiedSrcValid != nullptr)
    2315             :         {
    2316     3281460 :             for (int i = 0; i < nSrcLen; i += 2)
    2317             :             {
    2318     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
    2319     2067740 :                     bHasValid = true;
    2320             :                 else
    2321       74323 :                     padfDensity[i] = 0.0;
    2322             : 
    2323     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
    2324     2068400 :                     bHasValid = true;
    2325             :                 else
    2326       73668 :                     padfDensity[i + 1] = 0.0;
    2327             :             }
    2328             : 
    2329             :             // Reset or fail as needed.
    2330     1139400 :             if (bHasValid)
    2331     1116590 :                 bHasValid = false;
    2332             :             else
    2333       22806 :                 return false;
    2334             :         }
    2335             : 
    2336     1147640 :         if (poWK->papanBandSrcValid != nullptr &&
    2337           0 :             poWK->papanBandSrcValid[iBand] != nullptr)
    2338             :         {
    2339           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2340             :             {
    2341           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
    2342           0 :                     bHasValid = true;
    2343             :                 else
    2344           0 :                     padfDensity[i] = 0.0;
    2345             : 
    2346           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
    2347           0 :                                iSrcOffset + i + 1))
    2348           0 :                     bHasValid = true;
    2349             :                 else
    2350           0 :                     padfDensity[i + 1] = 0.0;
    2351             :             }
    2352             : 
    2353             :             // Reset or fail as needed.
    2354           0 :             if (bHasValid)
    2355           0 :                 bHasValid = false;
    2356             :             else
    2357           0 :                 return false;
    2358             :         }
    2359             :     }
    2360             : 
    2361             :     // TODO(schwehr): Fix casting.
    2362             :     // Fetch data.
    2363     2345230 :     switch (poWK->eWorkingDataType)
    2364             :     {
    2365     1136510 :         case GDT_Byte:
    2366             :         {
    2367     1136510 :             GByte *pSrc =
    2368     1136510 :                 reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
    2369     1136510 :             pSrc += iSrcOffset;
    2370     3281230 :             for (int i = 0; i < nSrcLen; i += 2)
    2371             :             {
    2372     2144720 :                 adfReal[i] = pSrc[i];
    2373     2144720 :                 adfReal[i + 1] = pSrc[i + 1];
    2374             :             }
    2375     1136510 :             break;
    2376             :         }
    2377             : 
    2378           0 :         case GDT_Int8:
    2379             :         {
    2380           0 :             GInt8 *pSrc =
    2381           0 :                 reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
    2382           0 :             pSrc += iSrcOffset;
    2383           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2384             :             {
    2385           0 :                 adfReal[i] = pSrc[i];
    2386           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2387             :             }
    2388           0 :             break;
    2389             :         }
    2390             : 
    2391        5614 :         case GDT_Int16:
    2392             :         {
    2393        5614 :             GInt16 *pSrc =
    2394        5614 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2395        5614 :             pSrc += iSrcOffset;
    2396       21492 :             for (int i = 0; i < nSrcLen; i += 2)
    2397             :             {
    2398       15878 :                 adfReal[i] = pSrc[i];
    2399       15878 :                 adfReal[i + 1] = pSrc[i + 1];
    2400             :             }
    2401        5614 :             break;
    2402             :         }
    2403             : 
    2404        4142 :         case GDT_UInt16:
    2405             :         {
    2406        4142 :             GUInt16 *pSrc =
    2407        4142 :                 reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
    2408        4142 :             pSrc += iSrcOffset;
    2409       18548 :             for (int i = 0; i < nSrcLen; i += 2)
    2410             :             {
    2411       14406 :                 adfReal[i] = pSrc[i];
    2412       14406 :                 adfReal[i + 1] = pSrc[i + 1];
    2413             :             }
    2414        4142 :             break;
    2415             :         }
    2416             : 
    2417         778 :         case GDT_Int32:
    2418             :         {
    2419         778 :             GInt32 *pSrc =
    2420         778 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2421         778 :             pSrc += iSrcOffset;
    2422        2288 :             for (int i = 0; i < nSrcLen; i += 2)
    2423             :             {
    2424        1510 :                 adfReal[i] = pSrc[i];
    2425        1510 :                 adfReal[i + 1] = pSrc[i + 1];
    2426             :             }
    2427         778 :             break;
    2428             :         }
    2429             : 
    2430         778 :         case GDT_UInt32:
    2431             :         {
    2432         778 :             GUInt32 *pSrc =
    2433         778 :                 reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
    2434         778 :             pSrc += iSrcOffset;
    2435        2288 :             for (int i = 0; i < nSrcLen; i += 2)
    2436             :             {
    2437        1510 :                 adfReal[i] = pSrc[i];
    2438        1510 :                 adfReal[i + 1] = pSrc[i + 1];
    2439             :             }
    2440         778 :             break;
    2441             :         }
    2442             : 
    2443          28 :         case GDT_Int64:
    2444             :         {
    2445          28 :             auto pSrc =
    2446          28 :                 reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
    2447          28 :             pSrc += iSrcOffset;
    2448          56 :             for (int i = 0; i < nSrcLen; i += 2)
    2449             :             {
    2450          28 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2451          28 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2452             :             }
    2453          28 :             break;
    2454             :         }
    2455             : 
    2456          28 :         case GDT_UInt64:
    2457             :         {
    2458          28 :             auto pSrc =
    2459          28 :                 reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
    2460          28 :             pSrc += iSrcOffset;
    2461          56 :             for (int i = 0; i < nSrcLen; i += 2)
    2462             :             {
    2463          28 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2464          28 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2465             :             }
    2466          28 :             break;
    2467             :         }
    2468             : 
    2469           0 :         case GDT_Float16:
    2470             :         {
    2471           0 :             GFloat16 *pSrc =
    2472           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2473           0 :             pSrc += iSrcOffset;
    2474           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2475             :             {
    2476           0 :                 adfReal[i] = pSrc[i];
    2477           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2478             :             }
    2479           0 :             break;
    2480             :         }
    2481             : 
    2482       25102 :         case GDT_Float32:
    2483             :         {
    2484       25102 :             float *pSrc =
    2485       25102 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2486       25102 :             pSrc += iSrcOffset;
    2487      121403 :             for (int i = 0; i < nSrcLen; i += 2)
    2488             :             {
    2489       96301 :                 adfReal[i] = double(pSrc[i]);
    2490       96301 :                 adfReal[i + 1] = double(pSrc[i + 1]);
    2491             :             }
    2492       25102 :             break;
    2493             :         }
    2494             : 
    2495         778 :         case GDT_Float64:
    2496             :         {
    2497         778 :             double *pSrc =
    2498         778 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2499         778 :             pSrc += iSrcOffset;
    2500        2288 :             for (int i = 0; i < nSrcLen; i += 2)
    2501             :             {
    2502        1510 :                 adfReal[i] = pSrc[i];
    2503        1510 :                 adfReal[i + 1] = pSrc[i + 1];
    2504             :             }
    2505         778 :             break;
    2506             :         }
    2507             : 
    2508     1169220 :         case GDT_CInt16:
    2509             :         {
    2510     1169220 :             GInt16 *pSrc =
    2511     1169220 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2512     1169220 :             pSrc += 2 * iSrcOffset;
    2513     4676020 :             for (int i = 0; i < nSrcLen; i += 2)
    2514             :             {
    2515     3506800 :                 adfReal[i] = pSrc[2 * i];
    2516     3506800 :                 padfImag[i] = pSrc[2 * i + 1];
    2517             : 
    2518     3506800 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2519     3506800 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2520             :             }
    2521     1169220 :             break;
    2522             :         }
    2523             : 
    2524         750 :         case GDT_CInt32:
    2525             :         {
    2526         750 :             GInt32 *pSrc =
    2527         750 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2528         750 :             pSrc += 2 * iSrcOffset;
    2529        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2530             :             {
    2531        1482 :                 adfReal[i] = pSrc[2 * i];
    2532        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2533             : 
    2534        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2535        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2536             :             }
    2537         750 :             break;
    2538             :         }
    2539             : 
    2540           0 :         case GDT_CFloat16:
    2541             :         {
    2542           0 :             GFloat16 *pSrc =
    2543           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2544           0 :             pSrc += 2 * iSrcOffset;
    2545           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2546             :             {
    2547           0 :                 adfReal[i] = pSrc[2 * i];
    2548           0 :                 padfImag[i] = pSrc[2 * i + 1];
    2549             : 
    2550           0 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2551           0 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2552             :             }
    2553           0 :             break;
    2554             :         }
    2555             : 
    2556         750 :         case GDT_CFloat32:
    2557             :         {
    2558         750 :             float *pSrc =
    2559         750 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2560         750 :             pSrc += 2 * iSrcOffset;
    2561        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2562             :             {
    2563        1482 :                 adfReal[i] = double(pSrc[2 * i]);
    2564        1482 :                 padfImag[i] = double(pSrc[2 * i + 1]);
    2565             : 
    2566        1482 :                 adfReal[i + 1] = double(pSrc[2 * i + 2]);
    2567        1482 :                 padfImag[i + 1] = double(pSrc[2 * i + 3]);
    2568             :             }
    2569         750 :             break;
    2570             :         }
    2571             : 
    2572         750 :         case GDT_CFloat64:
    2573             :         {
    2574         750 :             double *pSrc =
    2575         750 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2576         750 :             pSrc += 2 * iSrcOffset;
    2577        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2578             :             {
    2579        1482 :                 adfReal[i] = pSrc[2 * i];
    2580        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2581             : 
    2582        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2583        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2584             :             }
    2585         750 :             break;
    2586             :         }
    2587             : 
    2588           0 :         case GDT_Unknown:
    2589             :         case GDT_TypeCount:
    2590           0 :             CPLAssert(false);
    2591             :             if (padfDensity)
    2592             :                 memset(padfDensity, 0, nSrcLen * sizeof(double));
    2593             :             return false;
    2594             :     }
    2595             : 
    2596     2345230 :     if (padfDensity == nullptr)
    2597     1197590 :         return true;
    2598             : 
    2599     1147640 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2600             :     {
    2601     3253380 :         for (int i = 0; i < nSrcLen; i += 2)
    2602             :         {
    2603             :             // Take into account earlier calcs.
    2604     2125710 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2605             :             {
    2606     2085800 :                 padfDensity[i] = 1.0;
    2607     2085800 :                 bHasValid = true;
    2608             :             }
    2609             : 
    2610     2125710 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2611             :             {
    2612     2086460 :                 padfDensity[i + 1] = 1.0;
    2613     2086460 :                 bHasValid = true;
    2614             :             }
    2615             :         }
    2616             :     }
    2617             :     else
    2618             :     {
    2619       70068 :         for (int i = 0; i < nSrcLen; i += 2)
    2620             :         {
    2621       50103 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2622       50103 :                 padfDensity[i] =
    2623       50103 :                     double(poWK->pafUnifiedSrcDensity[iSrcOffset + i]);
    2624       50103 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2625       49252 :                 bHasValid = true;
    2626             : 
    2627       50103 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2628       50103 :                 padfDensity[i + 1] =
    2629       50103 :                     double(poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]);
    2630       50103 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2631       49170 :                 bHasValid = true;
    2632             :         }
    2633             :     }
    2634             : 
    2635     1147640 :     return bHasValid;
    2636             : }
    2637             : 
    2638             : /************************************************************************/
    2639             : /*                          GWKGetPixelT()                              */
    2640             : /************************************************************************/
    2641             : 
    2642             : template <class T>
    2643     9798646 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
    2644             :                          GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
    2645             : 
    2646             : {
    2647     9798646 :     T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2648             : 
    2649    22324997 :     if ((poWK->panUnifiedSrcValid != nullptr &&
    2650    19597272 :          !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
    2651     9798646 :         (poWK->papanBandSrcValid != nullptr &&
    2652      589836 :          poWK->papanBandSrcValid[iBand] != nullptr &&
    2653      589836 :          !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
    2654             :     {
    2655           9 :         *pdfDensity = 0.0;
    2656           9 :         return false;
    2657             :     }
    2658             : 
    2659     9798636 :     *pValue = pSrc[iSrcOffset];
    2660             : 
    2661     9798636 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2662     8676283 :         *pdfDensity = 1.0;
    2663             :     else
    2664     1122362 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2665             : 
    2666     9798636 :     return *pdfDensity != 0.0;
    2667             : }
    2668             : 
    2669             : /************************************************************************/
    2670             : /*                        GWKBilinearResample()                         */
    2671             : /*     Set of bilinear interpolators                                    */
    2672             : /************************************************************************/
    2673             : 
    2674       76488 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2675             :                                        double dfSrcX, double dfSrcY,
    2676             :                                        double *pdfDensity, double *pdfReal,
    2677             :                                        double *pdfImag)
    2678             : 
    2679             : {
    2680             :     // Save as local variables to avoid following pointers.
    2681       76488 :     const int nSrcXSize = poWK->nSrcXSize;
    2682       76488 :     const int nSrcYSize = poWK->nSrcYSize;
    2683             : 
    2684       76488 :     int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2685       76488 :     int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2686       76488 :     double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2687       76488 :     double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2688       76488 :     bool bShifted = false;
    2689             : 
    2690       76488 :     if (iSrcX == -1)
    2691             :     {
    2692        1534 :         iSrcX = 0;
    2693        1534 :         dfRatioX = 1;
    2694             :     }
    2695       76488 :     if (iSrcY == -1)
    2696             :     {
    2697        7734 :         iSrcY = 0;
    2698        7734 :         dfRatioY = 1;
    2699             :     }
    2700       76488 :     GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    2701             : 
    2702             :     // Shift so we don't overrun the array.
    2703       76488 :     if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
    2704       76430 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
    2705       76430 :             iSrcOffset + nSrcXSize + 1)
    2706             :     {
    2707         110 :         bShifted = true;
    2708         110 :         --iSrcOffset;
    2709             :     }
    2710             : 
    2711       76488 :     double adfDensity[2] = {0.0, 0.0};
    2712       76488 :     double adfReal[2] = {0.0, 0.0};
    2713       76488 :     double adfImag[2] = {0.0, 0.0};
    2714       76488 :     double dfAccumulatorReal = 0.0;
    2715       76488 :     double dfAccumulatorImag = 0.0;
    2716       76488 :     double dfAccumulatorDensity = 0.0;
    2717       76488 :     double dfAccumulatorDivisor = 0.0;
    2718             : 
    2719       76488 :     const GPtrDiff_t nSrcPixels =
    2720       76488 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
    2721             :     // Get pixel row.
    2722       76488 :     if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
    2723      152976 :         iSrcOffset < nSrcPixels &&
    2724       76488 :         GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
    2725             :                        adfImag))
    2726             :     {
    2727       70544 :         double dfMult1 = dfRatioX * dfRatioY;
    2728       70544 :         double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
    2729             : 
    2730             :         // Shifting corrected.
    2731       70544 :         if (bShifted)
    2732             :         {
    2733         110 :             adfReal[0] = adfReal[1];
    2734         110 :             adfImag[0] = adfImag[1];
    2735         110 :             adfDensity[0] = adfDensity[1];
    2736             :         }
    2737             : 
    2738             :         // Upper Left Pixel.
    2739       70544 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2740       70544 :             adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2741             :         {
    2742       65090 :             dfAccumulatorDivisor += dfMult1;
    2743             : 
    2744       65090 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    2745       65090 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    2746       65090 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    2747             :         }
    2748             : 
    2749             :         // Upper Right Pixel.
    2750       70544 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    2751       69889 :             adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2752             :         {
    2753       64615 :             dfAccumulatorDivisor += dfMult2;
    2754             : 
    2755       64615 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    2756       64615 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    2757       64615 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    2758             :         }
    2759             :     }
    2760             : 
    2761             :     // Get pixel row.
    2762       76488 :     if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
    2763      225392 :         iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
    2764       72416 :         GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
    2765             :                        adfReal, adfImag))
    2766             :     {
    2767       66857 :         double dfMult1 = dfRatioX * (1.0 - dfRatioY);
    2768       66857 :         double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    2769             : 
    2770             :         // Shifting corrected
    2771       66857 :         if (bShifted)
    2772             :         {
    2773          52 :             adfReal[0] = adfReal[1];
    2774          52 :             adfImag[0] = adfImag[1];
    2775          52 :             adfDensity[0] = adfDensity[1];
    2776             :         }
    2777             : 
    2778             :         // Lower Left Pixel
    2779       66857 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2780       66857 :             adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2781             :         {
    2782       61578 :             dfAccumulatorDivisor += dfMult1;
    2783             : 
    2784       61578 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    2785       61578 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    2786       61578 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    2787             :         }
    2788             : 
    2789             :         // Lower Right Pixel.
    2790       66857 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    2791       66260 :             adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2792             :         {
    2793       61283 :             dfAccumulatorDivisor += dfMult2;
    2794             : 
    2795       61283 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    2796       61283 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    2797       61283 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    2798             :         }
    2799             :     }
    2800             : 
    2801             :     /* -------------------------------------------------------------------- */
    2802             :     /*      Return result.                                                  */
    2803             :     /* -------------------------------------------------------------------- */
    2804       76488 :     if (dfAccumulatorDivisor == 1.0)
    2805             :     {
    2806       44969 :         *pdfReal = dfAccumulatorReal;
    2807       44969 :         *pdfImag = dfAccumulatorImag;
    2808       44969 :         *pdfDensity = dfAccumulatorDensity;
    2809       44969 :         return false;
    2810             :     }
    2811       31519 :     else if (dfAccumulatorDivisor < 0.00001)
    2812             :     {
    2813           0 :         *pdfReal = 0.0;
    2814           0 :         *pdfImag = 0.0;
    2815           0 :         *pdfDensity = 0.0;
    2816           0 :         return false;
    2817             :     }
    2818             :     else
    2819             :     {
    2820       31519 :         *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
    2821       31519 :         *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
    2822       31519 :         *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
    2823       31519 :         return true;
    2824             :     }
    2825             : }
    2826             : 
    2827             : template <class T>
    2828     6544500 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    2829             :                                                int iBand, double dfSrcX,
    2830             :                                                double dfSrcY, T *pValue)
    2831             : 
    2832             : {
    2833             : 
    2834     6544500 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2835     6544500 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2836     6544500 :     GPtrDiff_t iSrcOffset =
    2837     6544500 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    2838     6544500 :     const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2839     6544500 :     const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2840             : 
    2841     6544500 :     const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2842             : 
    2843     6544500 :     if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    2844     4420828 :         iSrcY + 1 < poWK->nSrcYSize)
    2845             :     {
    2846     4377610 :         const double dfAccumulator =
    2847     4377610 :             (double(pSrc[iSrcOffset]) * dfRatioX +
    2848     4377610 :              double(pSrc[iSrcOffset + 1]) * (1.0 - dfRatioX)) *
    2849             :                 dfRatioY +
    2850     4377610 :             (double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfRatioX +
    2851     4377610 :              double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) *
    2852     4377610 :                  (1.0 - dfRatioX)) *
    2853     4377610 :                 (1.0 - dfRatioY);
    2854             : 
    2855     4377610 :         *pValue = GWKRoundValueT<T>(dfAccumulator);
    2856             : 
    2857     4377560 :         return true;
    2858             :     }
    2859             : 
    2860     2166890 :     double dfAccumulatorDivisor = 0.0;
    2861     2166890 :     double dfAccumulator = 0.0;
    2862             : 
    2863             :     // Upper Left Pixel.
    2864     2166890 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
    2865      253821 :         iSrcY < poWK->nSrcYSize)
    2866             :     {
    2867      253821 :         const double dfMult = dfRatioX * dfRatioY;
    2868             : 
    2869      253821 :         dfAccumulatorDivisor += dfMult;
    2870             : 
    2871      253821 :         dfAccumulator += double(pSrc[iSrcOffset]) * dfMult;
    2872             :     }
    2873             : 
    2874             :     // Upper Right Pixel.
    2875     2166890 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    2876     1877980 :         iSrcY < poWK->nSrcYSize)
    2877             :     {
    2878     1858270 :         const double dfMult = (1.0 - dfRatioX) * dfRatioY;
    2879             : 
    2880     1858270 :         dfAccumulatorDivisor += dfMult;
    2881             : 
    2882     1858270 :         dfAccumulator += double(pSrc[iSrcOffset + 1]) * dfMult;
    2883             :     }
    2884             : 
    2885             :     // Lower Right Pixel.
    2886     2166890 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    2887     2003834 :         iSrcY + 1 < poWK->nSrcYSize)
    2888             :     {
    2889     1927512 :         const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    2890             : 
    2891     1927512 :         dfAccumulatorDivisor += dfMult;
    2892             : 
    2893     1927512 :         dfAccumulator +=
    2894     1927512 :             double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) * dfMult;
    2895             :     }
    2896             : 
    2897             :     // Lower Left Pixel.
    2898     2166890 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    2899      345069 :         iSrcY + 1 < poWK->nSrcYSize)
    2900             :     {
    2901      268052 :         const double dfMult = dfRatioX * (1.0 - dfRatioY);
    2902             : 
    2903      268052 :         dfAccumulatorDivisor += dfMult;
    2904             : 
    2905      268052 :         dfAccumulator += double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfMult;
    2906             :     }
    2907             : 
    2908             :     /* -------------------------------------------------------------------- */
    2909             :     /*      Return result.                                                  */
    2910             :     /* -------------------------------------------------------------------- */
    2911     2166890 :     double dfValue = 0.0;
    2912             : 
    2913     2166890 :     if (dfAccumulatorDivisor < 0.00001)
    2914             :     {
    2915           0 :         *pValue = 0;
    2916           0 :         return false;
    2917             :     }
    2918     2166890 :     else if (dfAccumulatorDivisor == 1.0)
    2919             :     {
    2920        7320 :         dfValue = dfAccumulator;
    2921             :     }
    2922             :     else
    2923             :     {
    2924     2159568 :         dfValue = dfAccumulator / dfAccumulatorDivisor;
    2925             :     }
    2926             : 
    2927     2166890 :     *pValue = GWKRoundValueT<T>(dfValue);
    2928             : 
    2929     2214420 :     return true;
    2930             : }
    2931             : 
    2932             : /************************************************************************/
    2933             : /*                        GWKCubicResample()                            */
    2934             : /*     Set of bicubic interpolators using cubic convolution.            */
    2935             : /************************************************************************/
    2936             : 
    2937             : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
    2938             : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
    2939             : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
    2940             : 
    2941             : template <typename T>
    2942     1742940 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
    2943             :                                  T f1, T f2, T f3)
    2944             : {
    2945     1742940 :     return (f1 + T(0.5) * (distance1 * (f2 - f0) +
    2946     1742940 :                            distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
    2947     1742940 :                            distance3 * (3 * (f1 - f2) + f3 - f0)));
    2948             : }
    2949             : 
    2950             : /************************************************************************/
    2951             : /*                       GWKCubicComputeWeights()                       */
    2952             : /************************************************************************/
    2953             : 
    2954             : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
    2955             : 
    2956             : template <typename T>
    2957    66465980 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
    2958             : {
    2959    66465980 :     const T halfX = T(0.5) * x;
    2960    66465980 :     const T threeX = T(3.0) * x;
    2961    66465980 :     const T halfX2 = halfX * x;
    2962             : 
    2963    66465980 :     coeffs[0] = halfX * (-1 + x * (2 - x));
    2964    66465980 :     coeffs[1] = 1 + halfX2 * (-5 + threeX);
    2965    66465980 :     coeffs[2] = halfX * (1 + x * (4 - threeX));
    2966    66465980 :     coeffs[3] = halfX2 * (-1 + x);
    2967    66465980 : }
    2968             : 
    2969    14411416 : template <typename T> inline double CONVOL4(const double v1[4], const T v2[4])
    2970             : {
    2971    14411416 :     return v1[0] * double(v2[0]) + v1[1] * double(v2[1]) +
    2972    14411416 :            v1[2] * double(v2[2]) + v1[3] * double(v2[3]);
    2973             : }
    2974             : 
    2975             : #if 0
    2976             : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
    2977             : // instead of 17.
    2978             : // TODO(schwehr): Use an inline function.
    2979             : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX)             \
    2980             :     {                                                                          \
    2981             :         const double dfX = dfX_;                                               \
    2982             :         dfHalfX = 0.5 * dfX;                                                   \
    2983             :         const double dfThreeX = 3.0 * dfX;                                     \
    2984             :         const double dfXMinus1 = dfX - 1;                                      \
    2985             :                                                                                \
    2986             :         adfCoeffs[0] = -1 + dfX * (2 - dfX);                                   \
    2987             :         adfCoeffs[1] = dfX * (-5 + dfThreeX);                                  \
    2988             :         /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/                           \
    2989             :         adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1];                              \
    2990             :         /*adfCoeffs[3] = dfX * (-1 + dfX); */                                  \
    2991             :         adfCoeffs[3] = dfXMinus1 - adfCoeffs[0];                               \
    2992             :     }
    2993             : 
    2994             : // TODO(schwehr): Use an inline function.
    2995             : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX)                               \
    2996             :     ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
    2997             :                            (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
    2998             : #endif
    2999             : 
    3000      302045 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
    3001             :                                     double dfSrcX, double dfSrcY,
    3002             :                                     double *pdfDensity, double *pdfReal,
    3003             :                                     double *pdfImag)
    3004             : 
    3005             : {
    3006      302045 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3007      302045 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3008      302045 :     GPtrDiff_t iSrcOffset =
    3009      302045 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3010      302045 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3011      302045 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3012      302045 :     double adfDensity[4] = {};
    3013      302045 :     double adfReal[4] = {};
    3014      302045 :     double adfImag[4] = {};
    3015             : 
    3016             :     // Get the bilinear interpolation at the image borders.
    3017      302045 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3018      286140 :         iSrcY + 2 >= poWK->nSrcYSize)
    3019       24670 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3020       24670 :                                           pdfDensity, pdfReal, pdfImag);
    3021             : 
    3022      277375 :     double adfValueDens[4] = {};
    3023      277375 :     double adfValueReal[4] = {};
    3024      277375 :     double adfValueImag[4] = {};
    3025             : 
    3026      277375 :     double adfCoeffsX[4] = {};
    3027      277375 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3028             : 
    3029     1240570 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3030             :     {
    3031     1009640 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3032      998035 :                             2, adfDensity, adfReal, adfImag) ||
    3033      998035 :             adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3034      980395 :             adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3035     2979770 :             adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3036      972094 :             adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
    3037             :         {
    3038       46449 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3039       46449 :                                               pdfDensity, pdfReal, pdfImag);
    3040             :         }
    3041             : 
    3042      963196 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3043      963196 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3044      963196 :         adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
    3045             :     }
    3046             : 
    3047             :     /* -------------------------------------------------------------------- */
    3048             :     /*      For now, if we have any pixels missing in the kernel area,      */
    3049             :     /*      we fallback on using bilinear interpolation.  Ideally we        */
    3050             :     /*      should do "weight adjustment" of our results similarly to       */
    3051             :     /*      what is done for the cubic spline and lanc. interpolators.      */
    3052             :     /* -------------------------------------------------------------------- */
    3053             : 
    3054      230926 :     double adfCoeffsY[4] = {};
    3055      230926 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3056             : 
    3057      230926 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3058      230926 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3059      230926 :     *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
    3060             : 
    3061      230926 :     return true;
    3062             : }
    3063             : 
    3064             : #ifdef USE_SSE2
    3065             : 
    3066             : /************************************************************************/
    3067             : /*                           XMMLoad4Values()                           */
    3068             : /*                                                                      */
    3069             : /*  Load 4 packed byte or uint16, cast them to float and put them in a  */
    3070             : /*  m128 register.                                                      */
    3071             : /************************************************************************/
    3072             : 
    3073   364563000 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
    3074             : {
    3075             :     unsigned int i;
    3076   364563000 :     memcpy(&i, ptr, 4);
    3077   729126000 :     __m128i xmm_i = _mm_cvtsi32_si128(i);
    3078             :     // Zero extend 4 packed unsigned 8-bit integers in a to packed
    3079             :     // 32-bit integers.
    3080             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3081             :     xmm_i = _mm_cvtepu8_epi32(xmm_i);
    3082             : #else
    3083   729126000 :     xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
    3084   729126000 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3085             : #endif
    3086   729126000 :     return _mm_cvtepi32_ps(xmm_i);
    3087             : }
    3088             : 
    3089      791724 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
    3090             : {
    3091             :     GUInt64 i;
    3092      791724 :     memcpy(&i, ptr, 8);
    3093     1583450 :     __m128i xmm_i = _mm_cvtsi64_si128(i);
    3094             :     // Zero extend 4 packed unsigned 16-bit integers in a to packed
    3095             :     // 32-bit integers.
    3096             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3097             :     xmm_i = _mm_cvtepu16_epi32(xmm_i);
    3098             : #else
    3099     1583450 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3100             : #endif
    3101     1583450 :     return _mm_cvtepi32_ps(xmm_i);
    3102             : }
    3103             : 
    3104             : /************************************************************************/
    3105             : /*                           XMMHorizontalAdd()                         */
    3106             : /*                                                                      */
    3107             : /*  Return the sum of the 4 floating points of the register.            */
    3108             : /************************************************************************/
    3109             : 
    3110             : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
    3111             : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3112             : {
    3113             :     __m128 shuf = _mm_movehdup_ps(v);   // (v3   , v3   , v1   , v1)
    3114             :     __m128 sums = _mm_add_ps(v, shuf);  // (v3+v3, v3+v2, v1+v1, v1+v0)
    3115             :     shuf = _mm_movehl_ps(shuf, sums);   // (v3   , v3   , v3+v3, v3+v2)
    3116             :     sums = _mm_add_ss(sums, shuf);      // (v1+v0)+(v3+v2)
    3117             :     return _mm_cvtss_f32(sums);
    3118             : }
    3119             : #else
    3120    94717100 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3121             : {
    3122    95064600 :     __m128 shuf = _mm_movehl_ps(v, v);     // (v3   , v2   , v3   , v2)
    3123    95064600 :     __m128 sums = _mm_add_ps(v, shuf);     // (v3+v3, v2+v2, v3+v1, v2+v0)
    3124    95064600 :     shuf = _mm_shuffle_ps(sums, sums, 1);  // (v2+v0, v2+v0, v2+v0, v3+v1)
    3125    95134200 :     sums = _mm_add_ss(sums, shuf);         // (v2+v0)+(v3+v1)
    3126    95134200 :     return _mm_cvtss_f32(sums);
    3127             : }
    3128             : #endif
    3129             : 
    3130             : #endif  // define USE_SSE2
    3131             : 
    3132             : /************************************************************************/
    3133             : /*            GWKCubicResampleSrcMaskIsDensity4SampleRealT()            */
    3134             : /************************************************************************/
    3135             : 
    3136             : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
    3137             : // because there are a few assumptions above those types.
    3138             : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
    3139             : // perf benefit.
    3140             : 
    3141             : template <class T>
    3142      389755 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
    3143             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3144             :     double *pdfDensity, double *pdfReal)
    3145             : {
    3146      389755 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3147      389755 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3148      389755 :     const GPtrDiff_t iSrcOffset =
    3149      389755 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3150             : 
    3151             :     // Get the bilinear interpolation at the image borders.
    3152      389755 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3153      387271 :         iSrcY + 2 >= poWK->nSrcYSize)
    3154             :     {
    3155        2484 :         double adfImagIgnored[4] = {};
    3156        2484 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3157        2484 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3158             :     }
    3159             : 
    3160             : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3161             :     const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
    3162             :     const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
    3163             : 
    3164             :     // TODO(schwehr): Explain the magic numbers.
    3165             :     float afTemp[4 + 4 + 4 + 1];
    3166             :     float *pafAligned =
    3167             :         reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
    3168             :     float *pafCoeffs = pafAligned;
    3169             :     float *pafDensity = pafAligned + 4;
    3170             :     float *pafValue = pafAligned + 8;
    3171             : 
    3172             :     const float fHalfDeltaX = 0.5f * fDeltaX;
    3173             :     const float fThreeDeltaX = 3.0f * fDeltaX;
    3174             :     const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
    3175             : 
    3176             :     pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
    3177             :     pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
    3178             :     pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
    3179             :     pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
    3180             :     __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
    3181             :     const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD_FLOAT);
    3182             : 
    3183             :     __m128 xmmMaskLowDensity = _mm_setzero_ps();
    3184             :     for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
    3185             :          i++, iOffset += poWK->nSrcXSize)
    3186             :     {
    3187             :         const __m128 xmmDensity =
    3188             :             _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
    3189             :         xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
    3190             :                                       _mm_cmplt_ps(xmmDensity, xmmThreshold));
    3191             :         pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3192             : 
    3193             :         const __m128 xmmValues =
    3194             :             XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
    3195             :         pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
    3196             :     }
    3197             :     if (_mm_movemask_ps(xmmMaskLowDensity))
    3198             :     {
    3199             :         double adfImagIgnored[4] = {};
    3200             :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3201             :                                           pdfDensity, pdfReal, adfImagIgnored);
    3202             :     }
    3203             : 
    3204             :     const float fHalfDeltaY = 0.5f * fDeltaY;
    3205             :     const float fThreeDeltaY = 3.0f * fDeltaY;
    3206             :     const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
    3207             : 
    3208             :     pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
    3209             :     pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
    3210             :     pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
    3211             :     pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
    3212             : 
    3213             :     xmmCoeffs = _mm_load_ps(pafCoeffs);
    3214             : 
    3215             :     const __m128 xmmDensity = _mm_load_ps(pafDensity);
    3216             :     const __m128 xmmValue = _mm_load_ps(pafValue);
    3217             :     *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3218             :     *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
    3219             : 
    3220             :     // We did all above computations on float32 whereas the general case is
    3221             :     // float64. Not sure if one is fundamentally more correct than the other
    3222             :     // one, but we want our optimization to give the same result as the
    3223             :     // general case as much as possible, so if the resulting value is
    3224             :     // close to some_int_value + 0.5, redo the computation with the general
    3225             :     // case.
    3226             :     // Note: If other types than Byte or UInt16, will need changes.
    3227             :     if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
    3228             :         return true;
    3229             : 
    3230             : #endif  // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3231             : 
    3232      387271 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3233      387271 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3234             : 
    3235      387271 :     double adfValueDens[4] = {};
    3236      387271 :     double adfValueReal[4] = {};
    3237             : 
    3238      387271 :     double adfCoeffsX[4] = {};
    3239      387271 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3240             : 
    3241      387271 :     double adfCoeffsY[4] = {};
    3242      387271 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3243             : 
    3244     1930200 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3245             :     {
    3246     1544480 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3247             : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
    3248     1544480 :         if (poWK->pafUnifiedSrcDensity[iOffset + 0] <
    3249     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3250     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 1] <
    3251     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3252     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 2] <
    3253     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3254     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 3] <
    3255             :                 SRC_DENSITY_THRESHOLD_FLOAT)
    3256             :         {
    3257        1551 :             double adfImagIgnored[4] = {};
    3258        1551 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3259             :                                               pdfDensity, pdfReal,
    3260        1551 :                                               adfImagIgnored);
    3261             :         }
    3262             : #endif
    3263             : 
    3264     3085860 :         adfValueDens[i + 1] =
    3265     1542930 :             CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
    3266             : 
    3267     1542930 :         adfValueReal[i + 1] = CONVOL4(
    3268             :             adfCoeffsX,
    3269     1542930 :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3270             :     }
    3271             : 
    3272      385720 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3273      385720 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3274             : 
    3275      385720 :     return true;
    3276             : }
    3277             : 
    3278             : /************************************************************************/
    3279             : /*              GWKCubicResampleSrcMaskIsDensity4SampleReal()             */
    3280             : /*     Bi-cubic when source has and only has pafUnifiedSrcDensity.      */
    3281             : /************************************************************************/
    3282             : 
    3283           0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
    3284             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3285             :     double *pdfDensity, double *pdfReal)
    3286             : 
    3287             : {
    3288           0 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3289           0 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3290           0 :     const GPtrDiff_t iSrcOffset =
    3291           0 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3292           0 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3293           0 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3294             : 
    3295             :     // Get the bilinear interpolation at the image borders.
    3296           0 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3297           0 :         iSrcY + 2 >= poWK->nSrcYSize)
    3298             :     {
    3299           0 :         double adfImagIgnored[4] = {};
    3300           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3301           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3302             :     }
    3303             : 
    3304           0 :     double adfCoeffsX[4] = {};
    3305           0 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3306             : 
    3307           0 :     double adfCoeffsY[4] = {};
    3308           0 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3309             : 
    3310           0 :     double adfValueDens[4] = {};
    3311           0 :     double adfValueReal[4] = {};
    3312           0 :     double adfDensity[4] = {};
    3313           0 :     double adfReal[4] = {};
    3314           0 :     double adfImagIgnored[4] = {};
    3315             : 
    3316           0 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3317             :     {
    3318           0 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3319           0 :                             2, adfDensity, adfReal, adfImagIgnored) ||
    3320           0 :             adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3321           0 :             adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3322           0 :             adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3323           0 :             adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
    3324             :         {
    3325           0 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3326             :                                               pdfDensity, pdfReal,
    3327           0 :                                               adfImagIgnored);
    3328             :         }
    3329             : 
    3330           0 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3331           0 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3332             :     }
    3333             : 
    3334           0 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3335           0 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3336             : 
    3337           0 :     return true;
    3338             : }
    3339             : 
    3340             : template <class T>
    3341     2231485 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3342             :                                             int iBand, double dfSrcX,
    3343             :                                             double dfSrcY, T *pValue)
    3344             : 
    3345             : {
    3346     2231485 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3347     2231485 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3348     2231485 :     const GPtrDiff_t iSrcOffset =
    3349     2231485 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3350     2231485 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3351     2231485 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3352     2231485 :     const double dfDeltaY2 = dfDeltaY * dfDeltaY;
    3353     2231485 :     const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
    3354             : 
    3355             :     // Get the bilinear interpolation at the image borders.
    3356     2231485 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3357     1814944 :         iSrcY + 2 >= poWK->nSrcYSize)
    3358      488548 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    3359      488548 :                                                   pValue);
    3360             : 
    3361     1742937 :     double adfCoeffs[4] = {};
    3362     1742937 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
    3363             : 
    3364     1742937 :     double adfValue[4] = {};
    3365             : 
    3366     8714670 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3367             :     {
    3368     6971746 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3369             : 
    3370     6971746 :         adfValue[i + 1] = CONVOL4(
    3371             :             adfCoeffs,
    3372     6971746 :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3373             :     }
    3374             : 
    3375             :     const double dfValue =
    3376     1742937 :         CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
    3377             :                          adfValue[1], adfValue[2], adfValue[3]);
    3378             : 
    3379     1742937 :     *pValue = GWKClampValueT<T>(dfValue);
    3380             : 
    3381     1742937 :     return true;
    3382             : }
    3383             : 
    3384             : /************************************************************************/
    3385             : /*                          GWKLanczosSinc()                            */
    3386             : /************************************************************************/
    3387             : 
    3388             : /*
    3389             :  * Lanczos windowed sinc interpolation kernel with radius r.
    3390             :  *        /
    3391             :  *        | sinc(x) * sinc(x/r), if |x| < r
    3392             :  * L(x) = | 1, if x = 0                     ,
    3393             :  *        | 0, otherwise
    3394             :  *        \
    3395             :  *
    3396             :  * where sinc(x) = sin(PI * x) / (PI * x).
    3397             :  */
    3398             : 
    3399        1632 : static double GWKLanczosSinc(double dfX)
    3400             : {
    3401        1632 :     if (dfX == 0.0)
    3402           0 :         return 1.0;
    3403             : 
    3404        1632 :     const double dfPIX = M_PI * dfX;
    3405        1632 :     const double dfPIXoverR = dfPIX / 3;
    3406        1632 :     const double dfPIX2overR = dfPIX * dfPIXoverR;
    3407             :     // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3408             :     // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3409        1632 :     const double dfSinPIXoverR = sin(dfPIXoverR);
    3410        1632 :     const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3411        1632 :     const double dfSinPIXMulSinPIXoverR =
    3412        1632 :         (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3413        1632 :     return dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3414             : }
    3415             : 
    3416      106692 : static double GWKLanczosSinc4Values(double *padfValues)
    3417             : {
    3418      533460 :     for (int i = 0; i < 4; i++)
    3419             :     {
    3420      426768 :         if (padfValues[i] == 0.0)
    3421             :         {
    3422           0 :             padfValues[i] = 1.0;
    3423             :         }
    3424             :         else
    3425             :         {
    3426      426768 :             const double dfPIX = M_PI * padfValues[i];
    3427      426768 :             const double dfPIXoverR = dfPIX / 3;
    3428      426768 :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3429             :             // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3430             :             // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3431      426768 :             const double dfSinPIXoverR = sin(dfPIXoverR);
    3432      426768 :             const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3433      426768 :             const double dfSinPIXMulSinPIXoverR =
    3434      426768 :                 (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3435      426768 :             padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3436             :         }
    3437             :     }
    3438      106692 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3439             : }
    3440             : 
    3441             : /************************************************************************/
    3442             : /*                           GWKBilinear()                              */
    3443             : /************************************************************************/
    3444             : 
    3445     6670070 : static double GWKBilinear(double dfX)
    3446             : {
    3447     6670070 :     double dfAbsX = fabs(dfX);
    3448     6670070 :     if (dfAbsX <= 1.0)
    3449     6198950 :         return 1 - dfAbsX;
    3450             :     else
    3451      471127 :         return 0.0;
    3452             : }
    3453             : 
    3454      401592 : static double GWKBilinear4Values(double *padfValues)
    3455             : {
    3456      401592 :     double dfAbsX0 = fabs(padfValues[0]);
    3457      401592 :     double dfAbsX1 = fabs(padfValues[1]);
    3458      401592 :     double dfAbsX2 = fabs(padfValues[2]);
    3459      401592 :     double dfAbsX3 = fabs(padfValues[3]);
    3460      401592 :     if (dfAbsX0 <= 1.0)
    3461      295634 :         padfValues[0] = 1 - dfAbsX0;
    3462             :     else
    3463      105958 :         padfValues[0] = 0.0;
    3464      401592 :     if (dfAbsX1 <= 1.0)
    3465      401592 :         padfValues[1] = 1 - dfAbsX1;
    3466             :     else
    3467           0 :         padfValues[1] = 0.0;
    3468      401592 :     if (dfAbsX2 <= 1.0)
    3469      401592 :         padfValues[2] = 1 - dfAbsX2;
    3470             :     else
    3471           0 :         padfValues[2] = 0.0;
    3472      401592 :     if (dfAbsX3 <= 1.0)
    3473      295510 :         padfValues[3] = 1 - dfAbsX3;
    3474             :     else
    3475      106082 :         padfValues[3] = 0.0;
    3476      401592 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3477             : }
    3478             : 
    3479             : /************************************************************************/
    3480             : /*                            GWKCubic()                                */
    3481             : /************************************************************************/
    3482             : 
    3483     4357000 : static double GWKCubic(double dfX)
    3484             : {
    3485     4357000 :     return CubicKernel(dfX);
    3486             : }
    3487             : 
    3488     8298770 : static double GWKCubic4Values(double *padfValues)
    3489             : {
    3490     8298770 :     const double dfAbsX_0 = fabs(padfValues[0]);
    3491     8298770 :     const double dfAbsX_1 = fabs(padfValues[1]);
    3492     8298770 :     const double dfAbsX_2 = fabs(padfValues[2]);
    3493     8298770 :     const double dfAbsX_3 = fabs(padfValues[3]);
    3494     8298770 :     const double dfX2_0 = padfValues[0] * padfValues[0];
    3495     8298770 :     const double dfX2_1 = padfValues[1] * padfValues[1];
    3496     8298770 :     const double dfX2_2 = padfValues[2] * padfValues[2];
    3497     8298770 :     const double dfX2_3 = padfValues[3] * padfValues[3];
    3498             : 
    3499     8298770 :     double dfVal0 = 0.0;
    3500     8298770 :     if (dfAbsX_0 <= 1.0)
    3501     1527520 :         dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
    3502     6771260 :     else if (dfAbsX_0 <= 2.0)
    3503     4903470 :         dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
    3504             : 
    3505     8298770 :     double dfVal1 = 0.0;
    3506     8298770 :     if (dfAbsX_1 <= 1.0)
    3507     4776030 :         dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
    3508     3522750 :     else if (dfAbsX_1 <= 2.0)
    3509     3520210 :         dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
    3510             : 
    3511     8298770 :     double dfVal2 = 0.0;
    3512     8298770 :     if (dfAbsX_2 <= 1.0)
    3513     6591780 :         dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
    3514     1707000 :     else if (dfAbsX_2 <= 2.0)
    3515     1704320 :         dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
    3516             : 
    3517     8298770 :     double dfVal3 = 0.0;
    3518     8298770 :     if (dfAbsX_3 <= 1.0)
    3519     3663450 :         dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
    3520     4635330 :     else if (dfAbsX_3 <= 2.0)
    3521     4264350 :         dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
    3522             : 
    3523     8298770 :     padfValues[0] = dfVal0;
    3524     8298770 :     padfValues[1] = dfVal1;
    3525     8298770 :     padfValues[2] = dfVal2;
    3526     8298770 :     padfValues[3] = dfVal3;
    3527     8298770 :     return dfVal0 + dfVal1 + dfVal2 + dfVal3;
    3528             : }
    3529             : 
    3530             : /************************************************************************/
    3531             : /*                           GWKBSpline()                               */
    3532             : /************************************************************************/
    3533             : 
    3534             : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
    3535             : // Equation 8 with (B,C)=(1,0)
    3536             : // 1/6 * ( 3 * |x|^3 -  6 * |x|^2 + 4) |x| < 1
    3537             : // 1/6 * ( -|x|^3 + 6 |x|^2  - 12|x| + 8) |x| >= 1 and |x| < 2
    3538             : 
    3539      139200 : static double GWKBSpline(double x)
    3540             : {
    3541      139200 :     const double xp2 = x + 2.0;
    3542      139200 :     const double xp1 = x + 1.0;
    3543      139200 :     const double xm1 = x - 1.0;
    3544             : 
    3545             :     // This will most likely be used, so we'll compute it ahead of time to
    3546             :     // avoid stalling the processor.
    3547      139200 :     const double xp2c = xp2 * xp2 * xp2;
    3548             : 
    3549             :     // Note that the test is computed only if it is needed.
    3550             :     // TODO(schwehr): Make this easier to follow.
    3551             :     return xp2 > 0.0
    3552      278400 :                ? ((xp1 > 0.0)
    3553      139200 :                       ? ((x > 0.0)
    3554      124806 :                              ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3555       90308 :                                    6.0 * x * x * x
    3556             :                              : 0.0) +
    3557      124806 :                             -4.0 * xp1 * xp1 * xp1
    3558             :                       : 0.0) +
    3559             :                      xp2c
    3560      139200 :                : 0.0;  // * 0.166666666666666666666
    3561             : }
    3562             : 
    3563     2220680 : static double GWKBSpline4Values(double *padfValues)
    3564             : {
    3565    11103400 :     for (int i = 0; i < 4; i++)
    3566             :     {
    3567     8882740 :         const double x = padfValues[i];
    3568     8882740 :         const double xp2 = x + 2.0;
    3569     8882740 :         const double xp1 = x + 1.0;
    3570     8882740 :         const double xm1 = x - 1.0;
    3571             : 
    3572             :         // This will most likely be used, so we'll compute it ahead of time to
    3573             :         // avoid stalling the processor.
    3574     8882740 :         const double xp2c = xp2 * xp2 * xp2;
    3575             : 
    3576             :         // Note that the test is computed only if it is needed.
    3577             :         // TODO(schwehr): Make this easier to follow.
    3578     8882740 :         padfValues[i] =
    3579             :             (xp2 > 0.0)
    3580    17765500 :                 ? ((xp1 > 0.0)
    3581     8882740 :                        ? ((x > 0.0)
    3582     6661820 :                               ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3583     4438260 :                                     6.0 * x * x * x
    3584             :                               : 0.0) +
    3585     6661820 :                              -4.0 * xp1 * xp1 * xp1
    3586             :                        : 0.0) +
    3587             :                       xp2c
    3588             :                 : 0.0;  // * 0.166666666666666666666
    3589             :     }
    3590     2220680 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3591             : }
    3592             : /************************************************************************/
    3593             : /*                       GWKResampleWrkStruct                           */
    3594             : /************************************************************************/
    3595             : 
    3596             : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
    3597             : 
    3598             : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
    3599             :                                    double dfSrcX, double dfSrcY,
    3600             :                                    double *pdfDensity, double *pdfReal,
    3601             :                                    double *pdfImag,
    3602             :                                    GWKResampleWrkStruct *psWrkStruct);
    3603             : 
    3604             : struct _GWKResampleWrkStruct
    3605             : {
    3606             :     pfnGWKResampleType pfnGWKResample;
    3607             : 
    3608             :     // Space for saved X weights.
    3609             :     double *padfWeightsX;
    3610             :     bool *pabCalcX;
    3611             : 
    3612             :     double *padfWeightsY;       // Only used by GWKResampleOptimizedLanczos.
    3613             :     int iLastSrcX;              // Only used by GWKResampleOptimizedLanczos.
    3614             :     int iLastSrcY;              // Only used by GWKResampleOptimizedLanczos.
    3615             :     double dfLastDeltaX;        // Only used by GWKResampleOptimizedLanczos.
    3616             :     double dfLastDeltaY;        // Only used by GWKResampleOptimizedLanczos.
    3617             :     double dfCosPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3618             :     double dfSinPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3619             :     double dfCosPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3620             :     double dfSinPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3621             :     double dfCosPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3622             :     double dfSinPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3623             :     double dfCosPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3624             :     double dfSinPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3625             : 
    3626             :     // Space for saving a row of pixels.
    3627             :     double *padfRowDensity;
    3628             :     double *padfRowReal;
    3629             :     double *padfRowImag;
    3630             : };
    3631             : 
    3632             : /************************************************************************/
    3633             : /*                    GWKResampleCreateWrkStruct()                      */
    3634             : /************************************************************************/
    3635             : 
    3636             : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3637             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3638             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
    3639             : 
    3640             : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3641             :                                         double dfSrcX, double dfSrcY,
    3642             :                                         double *pdfDensity, double *pdfReal,
    3643             :                                         double *pdfImag,
    3644             :                                         GWKResampleWrkStruct *psWrkStruct);
    3645             : 
    3646         357 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
    3647             : {
    3648         357 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3649         357 :     const int nYDist = (poWK->nYRadius + 1) * 2;
    3650             : 
    3651             :     GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
    3652         357 :         CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
    3653             : 
    3654             :     // Alloc space for saved X weights.
    3655         357 :     psWrkStruct->padfWeightsX =
    3656         357 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3657         357 :     psWrkStruct->pabCalcX =
    3658         357 :         static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
    3659             : 
    3660         357 :     psWrkStruct->padfWeightsY =
    3661         357 :         static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
    3662         357 :     psWrkStruct->iLastSrcX = -10;
    3663         357 :     psWrkStruct->iLastSrcY = -10;
    3664         357 :     psWrkStruct->dfLastDeltaX = -10;
    3665         357 :     psWrkStruct->dfLastDeltaY = -10;
    3666             : 
    3667             :     // Alloc space for saving a row of pixels.
    3668         357 :     if (poWK->pafUnifiedSrcDensity == nullptr &&
    3669         323 :         poWK->panUnifiedSrcValid == nullptr &&
    3670         310 :         poWK->papanBandSrcValid == nullptr)
    3671             :     {
    3672         310 :         psWrkStruct->padfRowDensity = nullptr;
    3673             :     }
    3674             :     else
    3675             :     {
    3676          47 :         psWrkStruct->padfRowDensity =
    3677          47 :             static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3678             :     }
    3679         357 :     psWrkStruct->padfRowReal =
    3680         357 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3681         357 :     psWrkStruct->padfRowImag =
    3682         357 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3683             : 
    3684         357 :     if (poWK->eResample == GRA_Lanczos)
    3685             :     {
    3686          63 :         psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
    3687             : 
    3688          63 :         if (poWK->dfXScale < 1)
    3689             :         {
    3690           4 :             psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
    3691           4 :             psWrkStruct->dfSinPiXScaleOver3 =
    3692           4 :                 sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
    3693           4 :                              psWrkStruct->dfCosPiXScaleOver3);
    3694             :             // "Naive":
    3695             :             // const double dfCosPiXScale = cos(  M_PI * dfXScale );
    3696             :             // const double dfSinPiXScale = sin(  M_PI * dfXScale );
    3697             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3698           4 :             psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
    3699           4 :                                               psWrkStruct->dfCosPiXScaleOver3 -
    3700           4 :                                           3) *
    3701           4 :                                          psWrkStruct->dfCosPiXScaleOver3;
    3702           4 :             psWrkStruct->dfSinPiXScale = sqrt(
    3703           4 :                 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
    3704             :         }
    3705             : 
    3706          63 :         if (poWK->dfYScale < 1)
    3707             :         {
    3708          11 :             psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
    3709          11 :             psWrkStruct->dfSinPiYScaleOver3 =
    3710          11 :                 sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
    3711          11 :                              psWrkStruct->dfCosPiYScaleOver3);
    3712             :             // "Naive":
    3713             :             // const double dfCosPiYScale = cos(  M_PI * dfYScale );
    3714             :             // const double dfSinPiYScale = sin(  M_PI * dfYScale );
    3715             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3716          11 :             psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
    3717          11 :                                               psWrkStruct->dfCosPiYScaleOver3 -
    3718          11 :                                           3) *
    3719          11 :                                          psWrkStruct->dfCosPiYScaleOver3;
    3720          11 :             psWrkStruct->dfSinPiYScale = sqrt(
    3721          11 :                 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
    3722             :         }
    3723             :     }
    3724             :     else
    3725         294 :         psWrkStruct->pfnGWKResample = GWKResample;
    3726             : 
    3727         357 :     return psWrkStruct;
    3728             : }
    3729             : 
    3730             : /************************************************************************/
    3731             : /*                    GWKResampleDeleteWrkStruct()                      */
    3732             : /************************************************************************/
    3733             : 
    3734         357 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
    3735             : {
    3736         357 :     CPLFree(psWrkStruct->padfWeightsX);
    3737         357 :     CPLFree(psWrkStruct->padfWeightsY);
    3738         357 :     CPLFree(psWrkStruct->pabCalcX);
    3739         357 :     CPLFree(psWrkStruct->padfRowDensity);
    3740         357 :     CPLFree(psWrkStruct->padfRowReal);
    3741         357 :     CPLFree(psWrkStruct->padfRowImag);
    3742         357 :     CPLFree(psWrkStruct);
    3743         357 : }
    3744             : 
    3745             : /************************************************************************/
    3746             : /*                           GWKResample()                              */
    3747             : /************************************************************************/
    3748             : 
    3749      239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3750             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3751             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
    3752             : 
    3753             : {
    3754             :     // Save as local variables to avoid following pointers in loops.
    3755      239383 :     const int nSrcXSize = poWK->nSrcXSize;
    3756      239383 :     const int nSrcYSize = poWK->nSrcYSize;
    3757             : 
    3758      239383 :     double dfAccumulatorReal = 0.0;
    3759      239383 :     double dfAccumulatorImag = 0.0;
    3760      239383 :     double dfAccumulatorDensity = 0.0;
    3761      239383 :     double dfAccumulatorWeight = 0.0;
    3762      239383 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3763      239383 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3764      239383 :     const GPtrDiff_t iSrcOffset =
    3765      239383 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    3766      239383 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3767      239383 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3768             : 
    3769      239383 :     const double dfXScale = poWK->dfXScale;
    3770      239383 :     const double dfYScale = poWK->dfYScale;
    3771             : 
    3772      239383 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3773             : 
    3774             :     // Space for saved X weights.
    3775      239383 :     double *padfWeightsX = psWrkStruct->padfWeightsX;
    3776      239383 :     bool *pabCalcX = psWrkStruct->pabCalcX;
    3777             : 
    3778             :     // Space for saving a row of pixels.
    3779      239383 :     double *padfRowDensity = psWrkStruct->padfRowDensity;
    3780      239383 :     double *padfRowReal = psWrkStruct->padfRowReal;
    3781      239383 :     double *padfRowImag = psWrkStruct->padfRowImag;
    3782             : 
    3783             :     // Mark as needing calculation (don't calculate the weights yet,
    3784             :     // because a mask may render it unnecessary).
    3785      239383 :     memset(pabCalcX, false, nXDist * sizeof(bool));
    3786             : 
    3787      239383 :     FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
    3788      239383 :     CPLAssert(pfnGetWeight);
    3789             : 
    3790             :     // Skip sampling over edge of image.
    3791      239383 :     int j = poWK->nFiltInitY;
    3792      239383 :     int jMax = poWK->nYRadius;
    3793      239383 :     if (iSrcY + j < 0)
    3794         566 :         j = -iSrcY;
    3795      239383 :     if (iSrcY + jMax >= nSrcYSize)
    3796         662 :         jMax = nSrcYSize - iSrcY - 1;
    3797             : 
    3798      239383 :     int iMin = poWK->nFiltInitX;
    3799      239383 :     int iMax = poWK->nXRadius;
    3800      239383 :     if (iSrcX + iMin < 0)
    3801         566 :         iMin = -iSrcX;
    3802      239383 :     if (iSrcX + iMax >= nSrcXSize)
    3803         659 :         iMax = nSrcXSize - iSrcX - 1;
    3804             : 
    3805      239383 :     const int bXScaleBelow1 = (dfXScale < 1.0);
    3806      239383 :     const int bYScaleBelow1 = (dfYScale < 1.0);
    3807             : 
    3808      239383 :     GPtrDiff_t iRowOffset =
    3809      239383 :         iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
    3810             : 
    3811             :     // Loop over pixel rows in the kernel.
    3812     1445930 :     for (; j <= jMax; ++j)
    3813             :     {
    3814     1206540 :         iRowOffset += nSrcXSize;
    3815             : 
    3816             :         // Get pixel values.
    3817             :         // We can potentially read extra elements after the "normal" end of the
    3818             :         // source arrays, but the contract of papabySrcImage[iBand],
    3819             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    3820             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    3821     1206540 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    3822             :                             padfRowDensity, padfRowReal, padfRowImag))
    3823          72 :             continue;
    3824             : 
    3825             :         // Calculate the Y weight.
    3826             :         double dfWeight1 = (bYScaleBelow1)
    3827     1206470 :                                ? pfnGetWeight((j - dfDeltaY) * dfYScale)
    3828        1600 :                                : pfnGetWeight(j - dfDeltaY);
    3829             : 
    3830             :         // Iterate over pixels in row.
    3831     1206470 :         double dfAccumulatorRealLocal = 0.0;
    3832     1206470 :         double dfAccumulatorImagLocal = 0.0;
    3833     1206470 :         double dfAccumulatorDensityLocal = 0.0;
    3834     1206470 :         double dfAccumulatorWeightLocal = 0.0;
    3835             : 
    3836     7317420 :         for (int i = iMin; i <= iMax; ++i)
    3837             :         {
    3838             :             // Skip sampling if pixel has zero density.
    3839     6110940 :             if (padfRowDensity != nullptr &&
    3840       77277 :                 padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
    3841         546 :                 continue;
    3842             : 
    3843     6110400 :             double dfWeight2 = 0.0;
    3844             : 
    3845             :             // Make or use a cached set of weights for this row.
    3846     6110400 :             if (pabCalcX[i - iMin])
    3847             :             {
    3848             :                 // Use saved weight value instead of recomputing it.
    3849     4903920 :                 dfWeight2 = padfWeightsX[i - iMin];
    3850             :             }
    3851             :             else
    3852             :             {
    3853             :                 // Calculate & save the X weight.
    3854     1206480 :                 padfWeightsX[i - iMin] = dfWeight2 =
    3855     1206480 :                     (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
    3856        1600 :                                     : pfnGetWeight(i - dfDeltaX);
    3857             : 
    3858     1206480 :                 pabCalcX[i - iMin] = true;
    3859             :             }
    3860             : 
    3861             :             // Accumulate!
    3862     6110400 :             dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
    3863     6110400 :             dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
    3864     6110400 :             if (padfRowDensity != nullptr)
    3865       76731 :                 dfAccumulatorDensityLocal +=
    3866       76731 :                     padfRowDensity[i - iMin] * dfWeight2;
    3867     6110400 :             dfAccumulatorWeightLocal += dfWeight2;
    3868             :         }
    3869             : 
    3870     1206470 :         dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
    3871     1206470 :         dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
    3872     1206470 :         dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
    3873     1206470 :         dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
    3874             :     }
    3875             : 
    3876      239383 :     if (dfAccumulatorWeight < 0.000001 ||
    3877        1887 :         (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
    3878             :     {
    3879           0 :         *pdfDensity = 0.0;
    3880           0 :         return false;
    3881             :     }
    3882             : 
    3883             :     // Calculate the output taking into account weighting.
    3884      239383 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    3885             :     {
    3886      239380 :         *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
    3887      239380 :         *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
    3888      239380 :         if (padfRowDensity != nullptr)
    3889        1884 :             *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
    3890             :         else
    3891      237496 :             *pdfDensity = 1.0;
    3892             :     }
    3893             :     else
    3894             :     {
    3895           3 :         *pdfReal = dfAccumulatorReal;
    3896           3 :         *pdfImag = dfAccumulatorImag;
    3897           3 :         if (padfRowDensity != nullptr)
    3898           3 :             *pdfDensity = dfAccumulatorDensity;
    3899             :         else
    3900           0 :             *pdfDensity = 1.0;
    3901             :     }
    3902             : 
    3903      239383 :     return true;
    3904             : }
    3905             : 
    3906             : /************************************************************************/
    3907             : /*                      GWKResampleOptimizedLanczos()                   */
    3908             : /************************************************************************/
    3909             : 
    3910      617144 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3911             :                                         double dfSrcX, double dfSrcY,
    3912             :                                         double *pdfDensity, double *pdfReal,
    3913             :                                         double *pdfImag,
    3914             :                                         GWKResampleWrkStruct *psWrkStruct)
    3915             : 
    3916             : {
    3917             :     // Save as local variables to avoid following pointers in loops.
    3918      617144 :     const int nSrcXSize = poWK->nSrcXSize;
    3919      617144 :     const int nSrcYSize = poWK->nSrcYSize;
    3920             : 
    3921      617144 :     double dfAccumulatorReal = 0.0;
    3922      617144 :     double dfAccumulatorImag = 0.0;
    3923      617144 :     double dfAccumulatorDensity = 0.0;
    3924      617144 :     double dfAccumulatorWeight = 0.0;
    3925      617144 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3926      617144 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3927      617144 :     const GPtrDiff_t iSrcOffset =
    3928      617144 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    3929      617144 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3930      617144 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3931             : 
    3932      617144 :     const double dfXScale = poWK->dfXScale;
    3933      617144 :     const double dfYScale = poWK->dfYScale;
    3934             : 
    3935             :     // Space for saved X weights.
    3936      617144 :     double *const padfWeightsXShifted =
    3937      617144 :         psWrkStruct->padfWeightsX - poWK->nFiltInitX;
    3938      617144 :     double *const padfWeightsYShifted =
    3939      617144 :         psWrkStruct->padfWeightsY - poWK->nFiltInitY;
    3940             : 
    3941             :     // Space for saving a row of pixels.
    3942      617144 :     double *const padfRowDensity = psWrkStruct->padfRowDensity;
    3943      617144 :     double *const padfRowReal = psWrkStruct->padfRowReal;
    3944      617144 :     double *const padfRowImag = psWrkStruct->padfRowImag;
    3945             : 
    3946             :     // Skip sampling over edge of image.
    3947      617144 :     int jMin = poWK->nFiltInitY;
    3948      617144 :     int jMax = poWK->nYRadius;
    3949      617144 :     if (iSrcY + jMin < 0)
    3950       16522 :         jMin = -iSrcY;
    3951      617144 :     if (iSrcY + jMax >= nSrcYSize)
    3952        5782 :         jMax = nSrcYSize - iSrcY - 1;
    3953             : 
    3954      617144 :     int iMin = poWK->nFiltInitX;
    3955      617144 :     int iMax = poWK->nXRadius;
    3956      617144 :     if (iSrcX + iMin < 0)
    3957       15797 :         iMin = -iSrcX;
    3958      617144 :     if (iSrcX + iMax >= nSrcXSize)
    3959        4657 :         iMax = nSrcXSize - iSrcX - 1;
    3960             : 
    3961      617144 :     if (dfXScale < 1.0)
    3962             :     {
    3963      403041 :         while ((iMin - dfDeltaX) * dfXScale < -3.0)
    3964      200179 :             iMin++;
    3965      202862 :         while ((iMax - dfDeltaX) * dfXScale > 3.0)
    3966           0 :             iMax--;
    3967             : 
    3968             :         // clang-format off
    3969             :         /*
    3970             :         Naive version:
    3971             :         for (int i = iMin; i <= iMax; ++i)
    3972             :         {
    3973             :             psWrkStruct->padfWeightsXShifted[i] =
    3974             :                 GWKLanczosSinc((i - dfDeltaX) * dfXScale);
    3975             :         }
    3976             : 
    3977             :         but given that:
    3978             : 
    3979             :         GWKLanczosSinc(x):
    3980             :             if (dfX == 0.0)
    3981             :                 return 1.0;
    3982             : 
    3983             :             const double dfPIX = M_PI * dfX;
    3984             :             const double dfPIXoverR = dfPIX / 3;
    3985             :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3986             :             return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
    3987             : 
    3988             :         and
    3989             :             sin (a + b) = sin a cos b + cos a sin b.
    3990             :             cos (a + b) = cos a cos b - sin a sin b.
    3991             : 
    3992             :         we can skip any sin() computation within the loop
    3993             :         */
    3994             :         // clang-format on
    3995             : 
    3996      202862 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    3997      131072 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    3998             :         {
    3999       71790 :             double dfX = (iMin - dfDeltaX) * dfXScale;
    4000             : 
    4001       71790 :             double dfPIXover3 = M_PI / 3 * dfX;
    4002       71790 :             double dfCosOver3 = cos(dfPIXover3);
    4003       71790 :             double dfSinOver3 = sin(dfPIXover3);
    4004             : 
    4005             :             // "Naive":
    4006             :             // double dfSin = sin( M_PI * dfX );
    4007             :             // double dfCos = cos( M_PI * dfX );
    4008             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4009       71790 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4010       71790 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4011             : 
    4012       71790 :             const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
    4013       71790 :             const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
    4014       71790 :             const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
    4015       71790 :             const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
    4016       71790 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4017       71790 :             padfWeightsXShifted[iMin] =
    4018       71790 :                 dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
    4019     1636480 :             for (int i = iMin + 1; i <= iMax; ++i)
    4020             :             {
    4021     1564690 :                 dfX += dfXScale;
    4022     1564690 :                 const double dfNewSin =
    4023     1564690 :                     dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
    4024     1564690 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
    4025     1564690 :                                              dfCosOver3 * dfSinPiXScaleOver3;
    4026     1564690 :                 padfWeightsXShifted[i] =
    4027             :                     dfX == 0
    4028     1564690 :                         ? 1.0
    4029     1564690 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
    4030     1564690 :                 const double dfNewCos =
    4031     1564690 :                     dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
    4032     1564690 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
    4033     1564690 :                                              dfSinOver3 * dfSinPiXScaleOver3;
    4034     1564690 :                 dfSin = dfNewSin;
    4035     1564690 :                 dfCos = dfNewCos;
    4036     1564690 :                 dfSinOver3 = dfNewSinOver3;
    4037     1564690 :                 dfCosOver3 = dfNewCosOver3;
    4038             :             }
    4039             : 
    4040       71790 :             psWrkStruct->iLastSrcX = iSrcX;
    4041       71790 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4042             :         }
    4043             :     }
    4044             :     else
    4045             :     {
    4046      757542 :         while (iMin - dfDeltaX < -3.0)
    4047      343260 :             iMin++;
    4048      414282 :         while (iMax - dfDeltaX > 3.0)
    4049           0 :             iMax--;
    4050             : 
    4051      414282 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    4052      209580 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    4053             :         {
    4054             :             // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
    4055             :             // following trigonometric formulas.
    4056             : 
    4057             :             // TODO(schwehr): Move this somewhere where it can be rendered at
    4058             :             // LaTeX.
    4059             :             // clang-format off
    4060             :             // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
    4061             :             //                            cos(M_PI * dfBase) * sin(M_PI * k)
    4062             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
    4063             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
    4064             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
    4065             : 
    4066             :             // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
    4067             :             //                                  cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
    4068             :             // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
    4069             :             // clang-format on
    4070             : 
    4071      414282 :             const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
    4072      414282 :             const double dfSin2PIDeltaXOver3 =
    4073             :                 dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
    4074             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
    4075      414282 :             const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
    4076      414282 :             const double dfSinPIDeltaX =
    4077      414282 :                 (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
    4078      414282 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4079      414282 :             const double dfInvPI2Over3xSinPIDeltaX =
    4080             :                 dfInvPI2Over3 * dfSinPIDeltaX;
    4081      414282 :             const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
    4082      414282 :                 -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
    4083      414282 :             const double dfSinPIOver3 = 0.8660254037844386;
    4084      414282 :             const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
    4085      414282 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
    4086             :             const double padfCst[] = {
    4087      414282 :                 dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
    4088      414282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
    4089             :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
    4090      414282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
    4091      414282 :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
    4092             : 
    4093     2936860 :             for (int i = iMin; i <= iMax; ++i)
    4094             :             {
    4095     2522570 :                 const double dfX = i - dfDeltaX;
    4096     2522570 :                 if (dfX == 0.0)
    4097       58282 :                     padfWeightsXShifted[i] = 1.0;
    4098             :                 else
    4099     2464290 :                     padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
    4100             : #if DEBUG_VERBOSE
    4101             :                     // TODO(schwehr): AlmostEqual.
    4102             :                     // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
    4103             :                     //               GWKLanczosSinc(dfX, 3.0)) < 1e-10);
    4104             : #endif
    4105             :             }
    4106             : 
    4107      414282 :             psWrkStruct->iLastSrcX = iSrcX;
    4108      414282 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4109             :         }
    4110             :     }
    4111             : 
    4112      617144 :     if (dfYScale < 1.0)
    4113             :     {
    4114      403116 :         while ((jMin - dfDeltaY) * dfYScale < -3.0)
    4115      200254 :             jMin++;
    4116      202862 :         while ((jMax - dfDeltaY) * dfYScale > 3.0)
    4117           0 :             jMax--;
    4118             : 
    4119             :         // clang-format off
    4120             :         /*
    4121             :         Naive version:
    4122             :         for (int j = jMin; j <= jMax; ++j)
    4123             :         {
    4124             :             padfWeightsYShifted[j] =
    4125             :                 GWKLanczosSinc((j - dfDeltaY) * dfYScale);
    4126             :         }
    4127             :         */
    4128             :         // clang-format on
    4129             : 
    4130      202862 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4131      202479 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4132             :         {
    4133         383 :             double dfY = (jMin - dfDeltaY) * dfYScale;
    4134             : 
    4135         383 :             double dfPIYover3 = M_PI / 3 * dfY;
    4136         383 :             double dfCosOver3 = cos(dfPIYover3);
    4137         383 :             double dfSinOver3 = sin(dfPIYover3);
    4138             : 
    4139             :             // "Naive":
    4140             :             // double dfSin = sin( M_PI * dfY );
    4141             :             // double dfCos = cos( M_PI * dfY );
    4142             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4143         383 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4144         383 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4145             : 
    4146         383 :             const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
    4147         383 :             const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
    4148         383 :             const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
    4149         383 :             const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
    4150         383 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4151         383 :             padfWeightsYShifted[jMin] =
    4152         383 :                 dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
    4153        7318 :             for (int j = jMin + 1; j <= jMax; ++j)
    4154             :             {
    4155        6935 :                 dfY += dfYScale;
    4156        6935 :                 const double dfNewSin =
    4157        6935 :                     dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
    4158        6935 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
    4159        6935 :                                              dfCosOver3 * dfSinPiYScaleOver3;
    4160        6935 :                 padfWeightsYShifted[j] =
    4161             :                     dfY == 0
    4162        6935 :                         ? 1.0
    4163        6935 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
    4164        6935 :                 const double dfNewCos =
    4165        6935 :                     dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
    4166        6935 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
    4167        6935 :                                              dfSinOver3 * dfSinPiYScaleOver3;
    4168        6935 :                 dfSin = dfNewSin;
    4169        6935 :                 dfCos = dfNewCos;
    4170        6935 :                 dfSinOver3 = dfNewSinOver3;
    4171        6935 :                 dfCosOver3 = dfNewCosOver3;
    4172             :             }
    4173             : 
    4174         383 :             psWrkStruct->iLastSrcY = iSrcY;
    4175         383 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4176             :         }
    4177             :     }
    4178             :     else
    4179             :     {
    4180      684742 :         while (jMin - dfDeltaY < -3.0)
    4181      270460 :             jMin++;
    4182      414282 :         while (jMax - dfDeltaY > 3.0)
    4183           0 :             jMax--;
    4184             : 
    4185      414282 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4186      413663 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4187             :         {
    4188        1132 :             const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
    4189        1132 :             const double dfSin2PIDeltaYOver3 =
    4190             :                 dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
    4191             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
    4192        1132 :             const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
    4193        1132 :             const double dfSinPIDeltaY =
    4194        1132 :                 (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
    4195        1132 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4196        1132 :             const double dfInvPI2Over3xSinPIDeltaY =
    4197             :                 dfInvPI2Over3 * dfSinPIDeltaY;
    4198        1132 :             const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
    4199        1132 :                 -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
    4200        1132 :             const double dfSinPIOver3 = 0.8660254037844386;
    4201        1132 :             const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
    4202        1132 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
    4203             :             const double padfCst[] = {
    4204        1132 :                 dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
    4205        1132 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
    4206             :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
    4207        1132 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
    4208        1132 :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
    4209             : 
    4210        7917 :             for (int j = jMin; j <= jMax; ++j)
    4211             :             {
    4212        6785 :                 const double dfY = j - dfDeltaY;
    4213        6785 :                 if (dfY == 0.0)
    4214         460 :                     padfWeightsYShifted[j] = 1.0;
    4215             :                 else
    4216        6325 :                     padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
    4217             : #if DEBUG_VERBOSE
    4218             :                     // TODO(schwehr): AlmostEqual.
    4219             :                     // CPLAssert(fabs(padfWeightsYShifted[j] -
    4220             :                     //               GWKLanczosSinc(dfY, 3.0)) < 1e-10);
    4221             : #endif
    4222             :             }
    4223             : 
    4224        1132 :             psWrkStruct->iLastSrcY = iSrcY;
    4225        1132 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4226             :         }
    4227             :     }
    4228             : 
    4229             :     // If we have no density information, we can simply compute the
    4230             :     // accumulated weight.
    4231      617144 :     if (padfRowDensity == nullptr)
    4232             :     {
    4233      617144 :         double dfRowAccWeight = 0.0;
    4234     7903490 :         for (int i = iMin; i <= iMax; ++i)
    4235             :         {
    4236     7286350 :             dfRowAccWeight += padfWeightsXShifted[i];
    4237             :         }
    4238      617144 :         double dfColAccWeight = 0.0;
    4239     7958040 :         for (int j = jMin; j <= jMax; ++j)
    4240             :         {
    4241     7340900 :             dfColAccWeight += padfWeightsYShifted[j];
    4242             :         }
    4243      617144 :         dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
    4244             :     }
    4245             : 
    4246             :     // Loop over pixel rows in the kernel.
    4247             : 
    4248      617144 :     if (poWK->eWorkingDataType == GDT_Byte && !poWK->panUnifiedSrcValid &&
    4249      616524 :         !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
    4250             :         !padfRowDensity)
    4251             :     {
    4252             :         // Optimization for Byte case without any masking/alpha
    4253             : 
    4254      616524 :         if (dfAccumulatorWeight < 0.000001)
    4255             :         {
    4256           0 :             *pdfDensity = 0.0;
    4257           0 :             return false;
    4258             :         }
    4259             : 
    4260      616524 :         const GByte *pSrc =
    4261      616524 :             reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
    4262      616524 :         pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4263             : 
    4264             : #if defined(USE_SSE2)
    4265      616524 :         if (iMax - iMin + 1 == 6)
    4266             :         {
    4267             :             // This is just an optimized version of the general case in
    4268             :             // the else clause.
    4269             : 
    4270      346854 :             pSrc += iMin;
    4271      346854 :             int j = jMin;
    4272             :             const auto fourXWeights =
    4273      346854 :                 XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
    4274             : 
    4275             :             // Process 2 lines at the same time.
    4276     1375860 :             for (; j < jMax; j += 2)
    4277             :             {
    4278             :                 const XMMReg4Double v_acc =
    4279     1029000 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4280             :                 const XMMReg4Double v_acc2 =
    4281     1029000 :                     XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
    4282     1029000 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4283     1029000 :                 const double dfRowAccEnd =
    4284     1029000 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4285     1029000 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4286     1029000 :                 dfAccumulatorReal +=
    4287     1029000 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4288     1029000 :                 const double dfRowAcc2 = v_acc2.GetHorizSum();
    4289     1029000 :                 const double dfRowAcc2End =
    4290     1029000 :                     pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
    4291     1029000 :                     pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
    4292     1029000 :                 dfAccumulatorReal +=
    4293     1029000 :                     (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
    4294     1029000 :                 pSrc += 2 * nSrcXSize;
    4295             :             }
    4296      346854 :             if (j == jMax)
    4297             :             {
    4298             :                 // Process last line if there's an odd number of them.
    4299             : 
    4300             :                 const XMMReg4Double v_acc =
    4301       86045 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4302       86045 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4303       86045 :                 const double dfRowAccEnd =
    4304       86045 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4305       86045 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4306       86045 :                 dfAccumulatorReal +=
    4307       86045 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4308             :             }
    4309             :         }
    4310             :         else
    4311             : #endif
    4312             :         {
    4313     5463580 :             for (int j = jMin; j <= jMax; ++j)
    4314             :             {
    4315     5193900 :                 int i = iMin;
    4316     5193900 :                 double dfRowAcc1 = 0.0;
    4317     5193900 :                 double dfRowAcc2 = 0.0;
    4318             :                 // A bit of loop unrolling
    4319    62750600 :                 for (; i < iMax; i += 2)
    4320             :                 {
    4321    57556700 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4322    57556700 :                     dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
    4323             :                 }
    4324     5193900 :                 if (i == iMax)
    4325             :                 {
    4326             :                     // Process last column if there's an odd number of them.
    4327      426183 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4328             :                 }
    4329             : 
    4330     5193900 :                 dfAccumulatorReal +=
    4331     5193900 :                     (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
    4332     5193900 :                 pSrc += nSrcXSize;
    4333             :             }
    4334             :         }
    4335             : 
    4336             :         // Calculate the output taking into account weighting.
    4337      616524 :         if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4338             :         {
    4339      569230 :             const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4340      569230 :             *pdfReal = dfAccumulatorReal * dfInvAcc;
    4341      569230 :             *pdfDensity = 1.0;
    4342             :         }
    4343             :         else
    4344             :         {
    4345       47294 :             *pdfReal = dfAccumulatorReal;
    4346       47294 :             *pdfDensity = 1.0;
    4347             :         }
    4348             : 
    4349      616524 :         return true;
    4350             :     }
    4351             : 
    4352         620 :     GPtrDiff_t iRowOffset =
    4353         620 :         iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
    4354             : 
    4355         620 :     int nCountValid = 0;
    4356         620 :     const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
    4357             : 
    4358        3560 :     for (int j = jMin; j <= jMax; ++j)
    4359             :     {
    4360        2940 :         iRowOffset += nSrcXSize;
    4361             : 
    4362             :         // Get pixel values.
    4363             :         // We can potentially read extra elements after the "normal" end of the
    4364             :         // source arrays, but the contract of papabySrcImage[iBand],
    4365             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4366             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4367        2940 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4368             :                             padfRowDensity, padfRowReal, padfRowImag))
    4369           0 :             continue;
    4370             : 
    4371        2940 :         const double dfWeight1 = padfWeightsYShifted[j];
    4372             : 
    4373             :         // Iterate over pixels in row.
    4374        2940 :         if (padfRowDensity != nullptr)
    4375             :         {
    4376           0 :             for (int i = iMin; i <= iMax; ++i)
    4377             :             {
    4378             :                 // Skip sampling if pixel has zero density.
    4379           0 :                 if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
    4380           0 :                     continue;
    4381             : 
    4382           0 :                 nCountValid++;
    4383             : 
    4384             :                 //  Use a cached set of weights for this row.
    4385           0 :                 const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
    4386             : 
    4387             :                 // Accumulate!
    4388           0 :                 dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
    4389           0 :                 dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
    4390           0 :                 dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
    4391           0 :                 dfAccumulatorWeight += dfWeight2;
    4392             :             }
    4393             :         }
    4394        2940 :         else if (bIsNonComplex)
    4395             :         {
    4396        1764 :             double dfRowAccReal = 0.0;
    4397       10560 :             for (int i = iMin; i <= iMax; ++i)
    4398             :             {
    4399        8796 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4400             : 
    4401             :                 // Accumulate!
    4402        8796 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4403             :             }
    4404             : 
    4405        1764 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4406             :         }
    4407             :         else
    4408             :         {
    4409        1176 :             double dfRowAccReal = 0.0;
    4410        1176 :             double dfRowAccImag = 0.0;
    4411        7040 :             for (int i = iMin; i <= iMax; ++i)
    4412             :             {
    4413        5864 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4414             : 
    4415             :                 // Accumulate!
    4416        5864 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4417        5864 :                 dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
    4418             :             }
    4419             : 
    4420        1176 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4421        1176 :             dfAccumulatorImag += dfRowAccImag * dfWeight1;
    4422             :         }
    4423             :     }
    4424             : 
    4425         620 :     if (dfAccumulatorWeight < 0.000001 ||
    4426           0 :         (padfRowDensity != nullptr &&
    4427           0 :          (dfAccumulatorDensity < 0.000001 ||
    4428           0 :           nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
    4429             :     {
    4430           0 :         *pdfDensity = 0.0;
    4431           0 :         return false;
    4432             :     }
    4433             : 
    4434             :     // Calculate the output taking into account weighting.
    4435         620 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4436             :     {
    4437           0 :         const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4438           0 :         *pdfReal = dfAccumulatorReal * dfInvAcc;
    4439           0 :         *pdfImag = dfAccumulatorImag * dfInvAcc;
    4440           0 :         if (padfRowDensity != nullptr)
    4441           0 :             *pdfDensity = dfAccumulatorDensity * dfInvAcc;
    4442             :         else
    4443           0 :             *pdfDensity = 1.0;
    4444             :     }
    4445             :     else
    4446             :     {
    4447         620 :         *pdfReal = dfAccumulatorReal;
    4448         620 :         *pdfImag = dfAccumulatorImag;
    4449         620 :         if (padfRowDensity != nullptr)
    4450           0 :             *pdfDensity = dfAccumulatorDensity;
    4451             :         else
    4452         620 :             *pdfDensity = 1.0;
    4453             :     }
    4454             : 
    4455         620 :     return true;
    4456             : }
    4457             : 
    4458             : /************************************************************************/
    4459             : /*                        GWKComputeWeights()                           */
    4460             : /************************************************************************/
    4461             : 
    4462     3874310 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
    4463             :                               double dfDeltaX, double dfXScale, int jMin,
    4464             :                               int jMax, double dfDeltaY, double dfYScale,
    4465             :                               double *padfWeightsHorizontal,
    4466             :                               double *padfWeightsVertical, double &dfInvWeights)
    4467             : {
    4468             : 
    4469     3874310 :     const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
    4470     3874310 :     CPLAssert(pfnGetWeight);
    4471     3874310 :     const FilterFunc4ValuesType pfnGetWeight4Values =
    4472     3874310 :         apfGWKFilter4Values[eResample];
    4473     3874310 :     CPLAssert(pfnGetWeight4Values);
    4474             : 
    4475     3874310 :     int i = iMin;  // Used after for.
    4476     3874310 :     int iC = 0;    // Used after for.
    4477             :     // Not zero, but as close as possible to it, to avoid potential division by
    4478             :     // zero at end of function
    4479     3874310 :     double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
    4480     8701700 :     for (; i + 2 < iMax; i += 4, iC += 4)
    4481             :     {
    4482     4824760 :         padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
    4483     4824760 :         padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
    4484     4824760 :         padfWeightsHorizontal[iC + 2] =
    4485     4824760 :             padfWeightsHorizontal[iC + 1] + dfXScale;
    4486     4824760 :         padfWeightsHorizontal[iC + 3] =
    4487     4824760 :             padfWeightsHorizontal[iC + 2] + dfXScale;
    4488     4827380 :         dfAccumulatorWeightHorizontal +=
    4489     4824760 :             pfnGetWeight4Values(padfWeightsHorizontal + iC);
    4490             :     }
    4491     4092900 :     for (; i <= iMax; ++i, ++iC)
    4492             :     {
    4493      223187 :         const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
    4494      215966 :         padfWeightsHorizontal[iC] = dfWeight;
    4495      215966 :         dfAccumulatorWeightHorizontal += dfWeight;
    4496             :     }
    4497             : 
    4498     3869710 :     int j = jMin;  // Used after for.
    4499     3869710 :     int jC = 0;    // Used after for.
    4500             :     // Not zero, but as close as possible to it, to avoid potential division by
    4501             :     // zero at end of function
    4502     3869710 :     double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
    4503     8146200 :     for (; j + 2 < jMax; j += 4, jC += 4)
    4504             :     {
    4505     4273930 :         padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
    4506     4273930 :         padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
    4507     4273930 :         padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
    4508     4273930 :         padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
    4509     4276490 :         dfAccumulatorWeightVertical +=
    4510     4273930 :             pfnGetWeight4Values(padfWeightsVertical + jC);
    4511             :     }
    4512     8377240 :     for (; j <= jMax; ++j, ++jC)
    4513             :     {
    4514     4505510 :         const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
    4515     4504960 :         padfWeightsVertical[jC] = dfWeight;
    4516     4504960 :         dfAccumulatorWeightVertical += dfWeight;
    4517             :     }
    4518             : 
    4519     3871720 :     dfInvWeights =
    4520     3871720 :         1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
    4521     3871720 : }
    4522             : 
    4523             : /************************************************************************/
    4524             : /*                        GWKResampleNoMasksT()                         */
    4525             : /************************************************************************/
    4526             : 
    4527             : template <class T>
    4528             : static bool
    4529             : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    4530             :                     double dfSrcY, T *pValue, double *padfWeightsHorizontal,
    4531             :                     double *padfWeightsVertical, double &dfInvWeights)
    4532             : 
    4533             : {
    4534             :     // Commonly used; save locally.
    4535             :     const int nSrcXSize = poWK->nSrcXSize;
    4536             :     const int nSrcYSize = poWK->nSrcYSize;
    4537             : 
    4538             :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4539             :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4540             :     const GPtrDiff_t iSrcOffset =
    4541             :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4542             : 
    4543             :     const int nXRadius = poWK->nXRadius;
    4544             :     const int nYRadius = poWK->nYRadius;
    4545             : 
    4546             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4547             :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4548             :         nYRadius > nSrcYSize)
    4549             :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4550             :                                                   pValue);
    4551             : 
    4552             :     T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    4553             :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4554             :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4555             : 
    4556             :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4557             :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4558             : 
    4559             :     int iMin = 1 - nXRadius;
    4560             :     if (iSrcX + iMin < 0)
    4561             :         iMin = -iSrcX;
    4562             :     int iMax = nXRadius;
    4563             :     if (iSrcX + iMax >= nSrcXSize - 1)
    4564             :         iMax = nSrcXSize - 1 - iSrcX;
    4565             : 
    4566             :     int jMin = 1 - nYRadius;
    4567             :     if (iSrcY + jMin < 0)
    4568             :         jMin = -iSrcY;
    4569             :     int jMax = nYRadius;
    4570             :     if (iSrcY + jMax >= nSrcYSize - 1)
    4571             :         jMax = nSrcYSize - 1 - iSrcY;
    4572             : 
    4573             :     if (iBand == 0)
    4574             :     {
    4575             :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4576             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4577             :                           padfWeightsVertical, dfInvWeights);
    4578             :     }
    4579             : 
    4580             :     // Loop over all rows in the kernel.
    4581             :     double dfAccumulator = 0.0;
    4582             :     for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
    4583             :     {
    4584             :         const GPtrDiff_t iSampJ =
    4585             :             iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
    4586             : 
    4587             :         // Loop over all pixels in the row.
    4588             :         double dfAccumulatorLocal = 0.0;
    4589             :         double dfAccumulatorLocal2 = 0.0;
    4590             :         int iC = 0;
    4591             :         int i = iMin;
    4592             :         // Process by chunk of 4 cols.
    4593             :         for (; i + 2 < iMax; i += 4, iC += 4)
    4594             :         {
    4595             :             // Retrieve the pixel & accumulate.
    4596             :             dfAccumulatorLocal +=
    4597             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4598             :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    4599             :                                   padfWeightsHorizontal[iC + 1];
    4600             :             dfAccumulatorLocal2 += double(pSrcBand[i + 2 + iSampJ]) *
    4601             :                                    padfWeightsHorizontal[iC + 2];
    4602             :             dfAccumulatorLocal2 += double(pSrcBand[i + 3 + iSampJ]) *
    4603             :                                    padfWeightsHorizontal[iC + 3];
    4604             :         }
    4605             :         dfAccumulatorLocal += dfAccumulatorLocal2;
    4606             :         if (i < iMax)
    4607             :         {
    4608             :             dfAccumulatorLocal +=
    4609             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4610             :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    4611             :                                   padfWeightsHorizontal[iC + 1];
    4612             :             i += 2;
    4613             :             iC += 2;
    4614             :         }
    4615             :         if (i == iMax)
    4616             :         {
    4617             :             dfAccumulatorLocal +=
    4618             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4619             :         }
    4620             : 
    4621             :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4622             :     }
    4623             : 
    4624             :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4625             : 
    4626             :     return true;
    4627             : }
    4628             : 
    4629             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    4630             : /* Could possibly be used too on 32bit, but we would need to check at runtime */
    4631             : #if defined(USE_SSE2)
    4632             : 
    4633             : /************************************************************************/
    4634             : /*                    GWKResampleNoMasks_SSE2_T()                       */
    4635             : /************************************************************************/
    4636             : 
    4637             : template <class T>
    4638     9549263 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
    4639             :                                       double dfSrcX, double dfSrcY, T *pValue,
    4640             :                                       double *padfWeightsHorizontal,
    4641             :                                       double *padfWeightsVertical,
    4642             :                                       double &dfInvWeights)
    4643             : {
    4644             :     // Commonly used; save locally.
    4645     9549263 :     const int nSrcXSize = poWK->nSrcXSize;
    4646     9549263 :     const int nSrcYSize = poWK->nSrcYSize;
    4647             : 
    4648     9549263 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4649     9549263 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4650     9549263 :     const GPtrDiff_t iSrcOffset =
    4651     9549263 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4652     9549263 :     const int nXRadius = poWK->nXRadius;
    4653     9549263 :     const int nYRadius = poWK->nYRadius;
    4654             : 
    4655             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4656     9549263 :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4657             :         nYRadius > nSrcYSize)
    4658         453 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4659           3 :                                                   pValue);
    4660             : 
    4661     9548801 :     const T *pSrcBand =
    4662     9548801 :         reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    4663             : 
    4664     9548801 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4665     9548801 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4666     9548801 :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4667     9534021 :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4668             : 
    4669     9537951 :     int iMin = 1 - nXRadius;
    4670     9537951 :     if (iSrcX + iMin < 0)
    4671       46218 :         iMin = -iSrcX;
    4672     9537951 :     int iMax = nXRadius;
    4673     9537951 :     if (iSrcX + iMax >= nSrcXSize - 1)
    4674       42714 :         iMax = nSrcXSize - 1 - iSrcX;
    4675             : 
    4676     9537951 :     int jMin = 1 - nYRadius;
    4677     9537951 :     if (iSrcY + jMin < 0)
    4678       49554 :         jMin = -iSrcY;
    4679     9537951 :     int jMax = nYRadius;
    4680     9537951 :     if (iSrcY + jMax >= nSrcYSize - 1)
    4681       35683 :         jMax = nSrcYSize - 1 - iSrcY;
    4682             : 
    4683     9537951 :     if (iBand == 0)
    4684             :     {
    4685     3875081 :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4686             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4687             :                           padfWeightsVertical, dfInvWeights);
    4688             :     }
    4689             : 
    4690     9542561 :     GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4691             :     // Process by chunk of 4 rows.
    4692     9542561 :     int jC = 0;
    4693     9542561 :     int j = jMin;
    4694     9542561 :     double dfAccumulator = 0.0;
    4695    20162193 :     for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
    4696             :     {
    4697             :         // Loop over all pixels in the row.
    4698    10639512 :         int iC = 0;
    4699    10639512 :         int i = iMin;
    4700             :         // Process by chunk of 4 cols.
    4701    10639512 :         XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
    4702    10614862 :         XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
    4703    10626782 :         XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
    4704    10633642 :         XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
    4705    27911180 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4706             :         {
    4707             :             // Retrieve the pixel & accumulate.
    4708    17275988 :             XMMReg4Double v_pixels_1 =
    4709    17275988 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4710    17269688 :             XMMReg4Double v_pixels_2 =
    4711    17269688 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
    4712    17284088 :             XMMReg4Double v_pixels_3 =
    4713    17284088 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4714    17286588 :             XMMReg4Double v_pixels_4 =
    4715    17286588 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4716             : 
    4717    17289588 :             XMMReg4Double v_padfWeight =
    4718    17289588 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4719             : 
    4720    17258688 :             v_acc_1 += v_pixels_1 * v_padfWeight;
    4721    17261688 :             v_acc_2 += v_pixels_2 * v_padfWeight;
    4722    17264988 :             v_acc_3 += v_pixels_3 * v_padfWeight;
    4723    17274488 :             v_acc_4 += v_pixels_4 * v_padfWeight;
    4724             :         }
    4725             : 
    4726    10635222 :         if (i < iMax)
    4727             :         {
    4728      145982 :             XMMReg2Double v_pixels_1 =
    4729      145982 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
    4730      145982 :             XMMReg2Double v_pixels_2 =
    4731      145982 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
    4732      145982 :             XMMReg2Double v_pixels_3 =
    4733      145982 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4734      145982 :             XMMReg2Double v_pixels_4 =
    4735      145982 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4736             : 
    4737      145982 :             XMMReg2Double v_padfWeight =
    4738      145982 :                 XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
    4739             : 
    4740      145982 :             v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
    4741      145982 :             v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
    4742      145982 :             v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
    4743      145982 :             v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
    4744             : 
    4745      145982 :             i += 2;
    4746      145982 :             iC += 2;
    4747             :         }
    4748             : 
    4749    10635222 :         double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
    4750    10622102 :         double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
    4751    10635762 :         double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
    4752    10637242 :         double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
    4753             : 
    4754    10619652 :         if (i == iMax)
    4755             :         {
    4756       52267 :             dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
    4757       52267 :                                     padfWeightsHorizontal[iC];
    4758       52267 :             dfAccumulatorLocal_2 +=
    4759       52267 :                 static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
    4760       52267 :                 padfWeightsHorizontal[iC];
    4761       52267 :             dfAccumulatorLocal_3 +=
    4762       52267 :                 static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
    4763       52267 :                 padfWeightsHorizontal[iC];
    4764       52267 :             dfAccumulatorLocal_4 +=
    4765       52267 :                 static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
    4766       52267 :                 padfWeightsHorizontal[iC];
    4767             :         }
    4768             : 
    4769    10619652 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
    4770    10619652 :         dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
    4771    10619652 :         dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
    4772    10619652 :         dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
    4773             :     }
    4774    22624341 :     for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
    4775             :     {
    4776             :         // Loop over all pixels in the row.
    4777    13063940 :         int iC = 0;
    4778    13063940 :         int i = iMin;
    4779             :         // Process by chunk of 4 cols.
    4780    13063940 :         XMMReg4Double v_acc = XMMReg4Double::Zero();
    4781    26105363 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4782             :         {
    4783             :             // Retrieve the pixel & accumulate.
    4784    13070023 :             XMMReg4Double v_pixels =
    4785    13070023 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4786    13041223 :             XMMReg4Double v_padfWeight =
    4787    13041223 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4788             : 
    4789    13081023 :             v_acc += v_pixels * v_padfWeight;
    4790             :         }
    4791             : 
    4792    13035340 :         double dfAccumulatorLocal = v_acc.GetHorizSum();
    4793             : 
    4794    13101640 :         if (i < iMax)
    4795             :         {
    4796      173976 :             dfAccumulatorLocal +=
    4797      173976 :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4798      173976 :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    4799      173976 :                                   padfWeightsHorizontal[iC + 1];
    4800      173976 :             i += 2;
    4801      173976 :             iC += 2;
    4802             :         }
    4803    13101640 :         if (i == iMax)
    4804             :         {
    4805       33032 :             dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
    4806       33032 :                                   padfWeightsHorizontal[iC];
    4807             :         }
    4808             : 
    4809    13101640 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4810             :     }
    4811             : 
    4812     9560501 :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4813             : 
    4814     9538851 :     return true;
    4815             : }
    4816             : 
    4817             : /************************************************************************/
    4818             : /*                     GWKResampleNoMasksT<GByte>()                     */
    4819             : /************************************************************************/
    4820             : 
    4821             : template <>
    4822     8964750 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
    4823             :                                 double dfSrcX, double dfSrcY, GByte *pValue,
    4824             :                                 double *padfWeightsHorizontal,
    4825             :                                 double *padfWeightsVertical,
    4826             :                                 double &dfInvWeights)
    4827             : {
    4828     8964750 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4829             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4830     8934350 :                                      dfInvWeights);
    4831             : }
    4832             : 
    4833             : /************************************************************************/
    4834             : /*                     GWKResampleNoMasksT<GInt16>()                    */
    4835             : /************************************************************************/
    4836             : 
    4837             : template <>
    4838      252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
    4839             :                                  double dfSrcX, double dfSrcY, GInt16 *pValue,
    4840             :                                  double *padfWeightsHorizontal,
    4841             :                                  double *padfWeightsVertical,
    4842             :                                  double &dfInvWeights)
    4843             : {
    4844      252563 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4845             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4846      252563 :                                      dfInvWeights);
    4847             : }
    4848             : 
    4849             : /************************************************************************/
    4850             : /*                     GWKResampleNoMasksT<GUInt16>()                   */
    4851             : /************************************************************************/
    4852             : 
    4853             : template <>
    4854      343440 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
    4855             :                                   double dfSrcX, double dfSrcY, GUInt16 *pValue,
    4856             :                                   double *padfWeightsHorizontal,
    4857             :                                   double *padfWeightsVertical,
    4858             :                                   double &dfInvWeights)
    4859             : {
    4860      343440 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4861             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4862      343440 :                                      dfInvWeights);
    4863             : }
    4864             : 
    4865             : /************************************************************************/
    4866             : /*                     GWKResampleNoMasksT<float>()                     */
    4867             : /************************************************************************/
    4868             : 
    4869             : template <>
    4870        2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
    4871             :                                 double dfSrcX, double dfSrcY, float *pValue,
    4872             :                                 double *padfWeightsHorizontal,
    4873             :                                 double *padfWeightsVertical,
    4874             :                                 double &dfInvWeights)
    4875             : {
    4876        2500 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4877             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4878        2500 :                                      dfInvWeights);
    4879             : }
    4880             : 
    4881             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    4882             : 
    4883             : /************************************************************************/
    4884             : /*                     GWKResampleNoMasksT<double>()                    */
    4885             : /************************************************************************/
    4886             : 
    4887             : template <>
    4888             : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
    4889             :                                  double dfSrcX, double dfSrcY, double *pValue,
    4890             :                                  double *padfWeightsHorizontal,
    4891             :                                  double *padfWeightsVertical,
    4892             :                                  double &dfInvWeights)
    4893             : {
    4894             :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4895             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4896             :                                      dfInvWeights);
    4897             : }
    4898             : 
    4899             : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
    4900             : 
    4901             : #endif /* defined(USE_SSE2) */
    4902             : 
    4903             : /************************************************************************/
    4904             : /*                     GWKRoundSourceCoordinates()                      */
    4905             : /************************************************************************/
    4906             : 
    4907        1000 : static void GWKRoundSourceCoordinates(
    4908             :     int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
    4909             :     double dfSrcCoordPrecision, double dfErrorThreshold,
    4910             :     GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
    4911             :     double dfDstY)
    4912             : {
    4913        1000 :     double dfPct = 0.8;
    4914        1000 :     if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
    4915             :     {
    4916        1000 :         dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
    4917             :     }
    4918        1000 :     const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
    4919             : 
    4920      501000 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    4921             :     {
    4922      500000 :         const double dfXBefore = padfX[iDstX];
    4923      500000 :         const double dfYBefore = padfY[iDstX];
    4924      500000 :         padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    4925             :                        dfSrcCoordPrecision;
    4926      500000 :         padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    4927             :                        dfSrcCoordPrecision;
    4928             : 
    4929             :         // If we are in an uncertainty zone, go to non-approximated
    4930             :         // transformation.
    4931             :         // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
    4932             :         // be at least 10 times greater than the approximation error.
    4933      500000 :         if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
    4934      399914 :             fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
    4935             :         {
    4936      180090 :             padfX[iDstX] = iDstX + dfDstXOff;
    4937      180090 :             padfY[iDstX] = dfDstY;
    4938      180090 :             padfZ[iDstX] = 0.0;
    4939      180090 :             pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
    4940      180090 :                            padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
    4941      180090 :             padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    4942             :                            dfSrcCoordPrecision;
    4943      180090 :             padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    4944             :                            dfSrcCoordPrecision;
    4945             :         }
    4946             :     }
    4947        1000 : }
    4948             : 
    4949             : /************************************************************************/
    4950             : /*                     GWKCheckAndComputeSrcOffsets()                   */
    4951             : /************************************************************************/
    4952             : static CPL_INLINE bool
    4953   147681000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
    4954             :                              int _iDstY, double *_padfX, double *_padfY,
    4955             :                              int _nSrcXSize, int _nSrcYSize,
    4956             :                              GPtrDiff_t &iSrcOffset)
    4957             : {
    4958   147681000 :     const GDALWarpKernel *_poWK = psJob->poWK;
    4959   147885000 :     for (int iTry = 0; iTry < 2; ++iTry)
    4960             :     {
    4961   148220000 :         if (iTry == 1)
    4962             :         {
    4963             :             // If the source coordinate is slightly outside of the source raster
    4964             :             // retry to transform it alone, so that the exact coordinate
    4965             :             // transformer is used.
    4966             : 
    4967      204492 :             _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
    4968      204492 :             _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
    4969      204492 :             double dfZ = 0;
    4970      204492 :             _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
    4971      204492 :                                   _padfX + _iDstX, _padfY + _iDstX, &dfZ,
    4972      204492 :                                   _pabSuccess + _iDstX);
    4973             :         }
    4974   148220000 :         if (!_pabSuccess[_iDstX])
    4975     3614790 :             return false;
    4976             : 
    4977             :         // If this happens this is likely the symptom of a bug somewhere.
    4978   144605000 :         if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
    4979             :         {
    4980             :             static bool bNanCoordFound = false;
    4981           0 :             if (!bNanCoordFound)
    4982             :             {
    4983           0 :                 CPLDebug("WARP",
    4984             :                          "GWKCheckAndComputeSrcOffsets(): "
    4985             :                          "NaN coordinate found on point %d.",
    4986             :                          _iDstX);
    4987           0 :                 bNanCoordFound = true;
    4988             :             }
    4989           0 :             return false;
    4990             :         }
    4991             : 
    4992             :         /* --------------------------------------------------------------------
    4993             :          */
    4994             :         /*      Figure out what pixel we want in our source raster, and skip */
    4995             :         /*      further processing if it is well off the source image. */
    4996             :         /* --------------------------------------------------------------------
    4997             :          */
    4998             :         /* We test against the value before casting to avoid the */
    4999             :         /* problem of asymmetric truncation effects around zero.  That is */
    5000             :         /* -0.5 will be 0 when cast to an int. */
    5001   144460000 :         if (_padfX[_iDstX] < _poWK->nSrcXOff)
    5002             :         {
    5003             :             // If the source coordinate is slightly outside of the source raster
    5004             :             // retry to transform it alone, so that the exact coordinate
    5005             :             // transformer is used.
    5006     6005840 :             if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
    5007       41982 :                 continue;
    5008     5963860 :             return false;
    5009             :         }
    5010             : 
    5011   138454000 :         if (_padfY[_iDstX] < _poWK->nSrcYOff)
    5012             :         {
    5013             :             // If the source coordinate is slightly outside of the source raster
    5014             :             // retry to transform it alone, so that the exact coordinate
    5015             :             // transformer is used.
    5016     6202900 :             if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
    5017       64198 :                 continue;
    5018     6138710 :             return false;
    5019             :         }
    5020             : 
    5021             :         // Check for potential overflow when casting from float to int, (if
    5022             :         // operating outside natural projection area, padfX/Y can be a very huge
    5023             :         // positive number before doing the actual conversion), as such cast is
    5024             :         // undefined behavior that can trigger exception with some compilers
    5025             :         // (see #6753)
    5026   132252000 :         if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
    5027             :         {
    5028             :             // If the source coordinate is slightly outside of the source raster
    5029             :             // retry to transform it alone, so that the exact coordinate
    5030             :             // transformer is used.
    5031     3931390 :             if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
    5032       47248 :                 continue;
    5033     3884140 :             return false;
    5034             :         }
    5035   128320000 :         if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
    5036             :         {
    5037             :             // If the source coordinate is slightly outside of the source raster
    5038             :             // retry to transform it alone, so that the exact coordinate
    5039             :             // transformer is used.
    5040     4487970 :             if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
    5041       51064 :                 continue;
    5042     4436910 :             return false;
    5043             :         }
    5044             : 
    5045   123832000 :         break;
    5046             :     }
    5047             : 
    5048   123497000 :     int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
    5049   123497000 :     int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
    5050   123497000 :     if (iSrcX == _nSrcXSize)
    5051           0 :         iSrcX--;
    5052   123497000 :     if (iSrcY == _nSrcYSize)
    5053           0 :         iSrcY--;
    5054             : 
    5055             :     // Those checks should normally be OK given the previous ones.
    5056   123497000 :     CPLAssert(iSrcX >= 0);
    5057   123497000 :     CPLAssert(iSrcY >= 0);
    5058   123497000 :     CPLAssert(iSrcX < _nSrcXSize);
    5059   123497000 :     CPLAssert(iSrcY < _nSrcYSize);
    5060             : 
    5061   123497000 :     iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
    5062             : 
    5063   123497000 :     return true;
    5064             : }
    5065             : 
    5066             : /************************************************************************/
    5067             : /*                   GWKOneSourceCornerFailsToReproject()               */
    5068             : /************************************************************************/
    5069             : 
    5070         818 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
    5071             : {
    5072         818 :     GDALWarpKernel *poWK = psJob->poWK;
    5073        2444 :     for (int iY = 0; iY <= 1; ++iY)
    5074             :     {
    5075        4884 :         for (int iX = 0; iX <= 1; ++iX)
    5076             :         {
    5077        3258 :             double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
    5078        3258 :             double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
    5079        3258 :             double dfZTmp = 0;
    5080        3258 :             int nSuccess = FALSE;
    5081        3258 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
    5082             :                                  &dfYTmp, &dfZTmp, &nSuccess);
    5083        3258 :             if (!nSuccess)
    5084           6 :                 return true;
    5085             :         }
    5086             :     }
    5087         812 :     return false;
    5088             : }
    5089             : 
    5090             : /************************************************************************/
    5091             : /*                       GWKAdjustSrcOffsetOnEdge()                     */
    5092             : /************************************************************************/
    5093             : 
    5094        9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
    5095             :                                      GPtrDiff_t &iSrcOffset)
    5096             : {
    5097        9714 :     GDALWarpKernel *poWK = psJob->poWK;
    5098        9714 :     const int nSrcXSize = poWK->nSrcXSize;
    5099        9714 :     const int nSrcYSize = poWK->nSrcYSize;
    5100             : 
    5101             :     // Check if the computed source position slightly altered
    5102             :     // fails to reproject. If so, then we are at the edge of
    5103             :     // the validity area, and it is worth checking neighbour
    5104             :     // source pixels for validity.
    5105        9714 :     int nSuccess = FALSE;
    5106             :     {
    5107        9714 :         double dfXTmp =
    5108        9714 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5109        9714 :         double dfYTmp =
    5110        9714 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5111        9714 :         double dfZTmp = 0;
    5112        9714 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5113             :                              &dfZTmp, &nSuccess);
    5114             :     }
    5115        9714 :     if (nSuccess)
    5116             :     {
    5117        6996 :         double dfXTmp =
    5118        6996 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5119        6996 :         double dfYTmp =
    5120        6996 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5121        6996 :         double dfZTmp = 0;
    5122        6996 :         nSuccess = FALSE;
    5123        6996 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5124             :                              &dfZTmp, &nSuccess);
    5125             :     }
    5126        9714 :     if (nSuccess)
    5127             :     {
    5128        5624 :         double dfXTmp =
    5129        5624 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5130        5624 :         double dfYTmp =
    5131        5624 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5132        5624 :         double dfZTmp = 0;
    5133        5624 :         nSuccess = FALSE;
    5134        5624 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5135             :                              &dfZTmp, &nSuccess);
    5136             :     }
    5137             : 
    5138       14166 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5139        4452 :         CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
    5140             :     {
    5141        1860 :         iSrcOffset++;
    5142        1860 :         return true;
    5143             :     }
    5144       10290 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5145        2436 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
    5146             :     {
    5147        1334 :         iSrcOffset += nSrcXSize;
    5148        1334 :         return true;
    5149             :     }
    5150        7838 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5151        1318 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
    5152             :     {
    5153         956 :         iSrcOffset--;
    5154         956 :         return true;
    5155             :     }
    5156        5924 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5157         360 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
    5158             :     {
    5159         340 :         iSrcOffset -= nSrcXSize;
    5160         340 :         return true;
    5161             :     }
    5162             : 
    5163        5224 :     return false;
    5164             : }
    5165             : 
    5166             : /************************************************************************/
    5167             : /*                 GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity()          */
    5168             : /************************************************************************/
    5169             : 
    5170           0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
    5171             :                                                       GPtrDiff_t &iSrcOffset)
    5172             : {
    5173           0 :     GDALWarpKernel *poWK = psJob->poWK;
    5174           0 :     const int nSrcXSize = poWK->nSrcXSize;
    5175           0 :     const int nSrcYSize = poWK->nSrcYSize;
    5176             : 
    5177             :     // Check if the computed source position slightly altered
    5178             :     // fails to reproject. If so, then we are at the edge of
    5179             :     // the validity area, and it is worth checking neighbour
    5180             :     // source pixels for validity.
    5181           0 :     int nSuccess = FALSE;
    5182             :     {
    5183           0 :         double dfXTmp =
    5184           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5185           0 :         double dfYTmp =
    5186           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5187           0 :         double dfZTmp = 0;
    5188           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5189             :                              &dfZTmp, &nSuccess);
    5190             :     }
    5191           0 :     if (nSuccess)
    5192             :     {
    5193           0 :         double dfXTmp =
    5194           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5195           0 :         double dfYTmp =
    5196           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5197           0 :         double dfZTmp = 0;
    5198           0 :         nSuccess = FALSE;
    5199           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5200             :                              &dfZTmp, &nSuccess);
    5201             :     }
    5202           0 :     if (nSuccess)
    5203             :     {
    5204           0 :         double dfXTmp =
    5205           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5206           0 :         double dfYTmp =
    5207           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5208           0 :         double dfZTmp = 0;
    5209           0 :         nSuccess = FALSE;
    5210           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5211             :                              &dfZTmp, &nSuccess);
    5212             :     }
    5213             : 
    5214           0 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5215           0 :         poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >=
    5216             :             SRC_DENSITY_THRESHOLD_FLOAT)
    5217             :     {
    5218           0 :         iSrcOffset++;
    5219           0 :         return true;
    5220             :     }
    5221           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5222           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
    5223             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5224             :     {
    5225           0 :         iSrcOffset += nSrcXSize;
    5226           0 :         return true;
    5227             :     }
    5228           0 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5229           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
    5230             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5231             :     {
    5232           0 :         iSrcOffset--;
    5233           0 :         return true;
    5234             :     }
    5235           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5236           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
    5237             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5238             :     {
    5239           0 :         iSrcOffset -= nSrcXSize;
    5240           0 :         return true;
    5241             :     }
    5242             : 
    5243           0 :     return false;
    5244             : }
    5245             : 
    5246             : /************************************************************************/
    5247             : /*                           GWKGeneralCase()                           */
    5248             : /*                                                                      */
    5249             : /*      This is the most general case.  It attempts to handle all       */
    5250             : /*      possible features with relatively little concern for            */
    5251             : /*      efficiency.                                                     */
    5252             : /************************************************************************/
    5253             : 
    5254         239 : static void GWKGeneralCaseThread(void *pData)
    5255             : {
    5256         239 :     GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
    5257         239 :     GDALWarpKernel *poWK = psJob->poWK;
    5258         239 :     const int iYMin = psJob->iYMin;
    5259         239 :     const int iYMax = psJob->iYMax;
    5260             :     const double dfMultFactorVerticalShiftPipeline =
    5261         239 :         poWK->bApplyVerticalShift
    5262         239 :             ? CPLAtof(CSLFetchNameValueDef(
    5263           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5264             :                   "1.0"))
    5265         239 :             : 0.0;
    5266             : 
    5267         239 :     int nDstXSize = poWK->nDstXSize;
    5268         239 :     int nSrcXSize = poWK->nSrcXSize;
    5269         239 :     int nSrcYSize = poWK->nSrcYSize;
    5270             : 
    5271             :     /* -------------------------------------------------------------------- */
    5272             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5273             :     /*      scanlines worth of positions.                                   */
    5274             :     /* -------------------------------------------------------------------- */
    5275             :     // For x, 2 *, because we cache the precomputed values at the end.
    5276             :     double *padfX =
    5277         239 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5278             :     double *padfY =
    5279         239 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5280             :     double *padfZ =
    5281         239 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5282         239 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5283             : 
    5284         239 :     const bool bUse4SamplesFormula =
    5285         239 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    5286             : 
    5287         239 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5288         239 :     if (poWK->eResample != GRA_NearestNeighbour)
    5289             :     {
    5290         220 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5291             :     }
    5292         239 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5293         239 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5294         239 :     const double dfErrorThreshold = CPLAtof(
    5295         239 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5296             : 
    5297             :     const bool bOneSourceCornerFailsToReproject =
    5298         239 :         GWKOneSourceCornerFailsToReproject(psJob);
    5299             : 
    5300             :     // Precompute values.
    5301        6469 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5302        6230 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5303             : 
    5304             :     /* ==================================================================== */
    5305             :     /*      Loop over output lines.                                         */
    5306             :     /* ==================================================================== */
    5307        6469 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5308             :     {
    5309             :         /* --------------------------------------------------------------------
    5310             :          */
    5311             :         /*      Setup points to transform to source image space. */
    5312             :         /* --------------------------------------------------------------------
    5313             :          */
    5314        6230 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5315        6230 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5316      242390 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5317      236160 :             padfY[iDstX] = dfY;
    5318        6230 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5319             : 
    5320             :         /* --------------------------------------------------------------------
    5321             :          */
    5322             :         /*      Transform the points from destination pixel/line coordinates */
    5323             :         /*      to source pixel/line coordinates. */
    5324             :         /* --------------------------------------------------------------------
    5325             :          */
    5326        6230 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5327             :                              padfY, padfZ, pabSuccess);
    5328        6230 :         if (dfSrcCoordPrecision > 0.0)
    5329             :         {
    5330           0 :             GWKRoundSourceCoordinates(
    5331             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5332             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5333           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5334             :         }
    5335             : 
    5336             :         /* ====================================================================
    5337             :          */
    5338             :         /*      Loop over pixels in output scanline. */
    5339             :         /* ====================================================================
    5340             :          */
    5341      242390 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5342             :         {
    5343      236160 :             GPtrDiff_t iSrcOffset = 0;
    5344      236160 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5345             :                                               padfX, padfY, nSrcXSize,
    5346             :                                               nSrcYSize, iSrcOffset))
    5347           0 :                 continue;
    5348             : 
    5349             :             /* --------------------------------------------------------------------
    5350             :              */
    5351             :             /*      Do not try to apply transparent/invalid source pixels to the
    5352             :              */
    5353             :             /*      destination.  This currently ignores the multi-pixel input
    5354             :              */
    5355             :             /*      of bilinear and cubic resamples. */
    5356             :             /* --------------------------------------------------------------------
    5357             :              */
    5358      236160 :             double dfDensity = 1.0;
    5359             : 
    5360      236160 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5361             :             {
    5362        1200 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5363        1200 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    5364             :                 {
    5365           0 :                     if (!bOneSourceCornerFailsToReproject)
    5366             :                     {
    5367           0 :                         continue;
    5368             :                     }
    5369           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5370             :                                  psJob, iSrcOffset))
    5371             :                     {
    5372           0 :                         dfDensity =
    5373           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5374             :                     }
    5375             :                     else
    5376             :                     {
    5377           0 :                         continue;
    5378             :                     }
    5379             :                 }
    5380             :             }
    5381             : 
    5382      236160 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5383           0 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5384             :             {
    5385           0 :                 if (!bOneSourceCornerFailsToReproject)
    5386             :                 {
    5387           0 :                     continue;
    5388             :                 }
    5389           0 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5390             :                 {
    5391           0 :                     continue;
    5392             :                 }
    5393             :             }
    5394             : 
    5395             :             /* ====================================================================
    5396             :              */
    5397             :             /*      Loop processing each band. */
    5398             :             /* ====================================================================
    5399             :              */
    5400      236160 :             bool bHasFoundDensity = false;
    5401             : 
    5402      236160 :             const GPtrDiff_t iDstOffset =
    5403      236160 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5404      472320 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5405             :             {
    5406      236160 :                 double dfBandDensity = 0.0;
    5407      236160 :                 double dfValueReal = 0.0;
    5408      236160 :                 double dfValueImag = 0.0;
    5409             : 
    5410             :                 /* --------------------------------------------------------------------
    5411             :                  */
    5412             :                 /*      Collect the source value. */
    5413             :                 /* --------------------------------------------------------------------
    5414             :                  */
    5415      236160 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5416             :                     nSrcYSize == 1)
    5417             :                 {
    5418             :                     // FALSE is returned if dfBandDensity == 0, which is
    5419             :                     // checked below.
    5420         568 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValue(
    5421             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
    5422             :                         &dfValueImag));
    5423             :                 }
    5424      235592 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5425             :                 {
    5426         248 :                     GWKBilinearResample4Sample(
    5427         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5428         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5429             :                         &dfValueReal, &dfValueImag);
    5430             :                 }
    5431      235344 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5432             :                 {
    5433         248 :                     GWKCubicResample4Sample(
    5434         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5435         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5436             :                         &dfValueReal, &dfValueImag);
    5437             :                 }
    5438             :                 else
    5439             : #ifdef DEBUG
    5440             :                     // Only useful for clang static analyzer.
    5441      235096 :                     if (psWrkStruct != nullptr)
    5442             : #endif
    5443             :                     {
    5444      235096 :                         psWrkStruct->pfnGWKResample(
    5445      235096 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5446      235096 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5447             :                             &dfValueReal, &dfValueImag, psWrkStruct);
    5448             :                     }
    5449             : 
    5450             :                 // If we didn't find any valid inputs skip to next band.
    5451      236160 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5452           0 :                     continue;
    5453             : 
    5454      236160 :                 if (poWK->bApplyVerticalShift)
    5455             :                 {
    5456           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5457           0 :                         continue;
    5458             :                     // Subtract padfZ[] since the coordinate transformation is
    5459             :                     // from target to source
    5460           0 :                     dfValueReal =
    5461           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5462           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5463             :                 }
    5464             : 
    5465      236160 :                 bHasFoundDensity = true;
    5466             : 
    5467             :                 /* --------------------------------------------------------------------
    5468             :                  */
    5469             :                 /*      We have a computed value from the source.  Now apply it
    5470             :                  * to      */
    5471             :                 /*      the destination pixel. */
    5472             :                 /* --------------------------------------------------------------------
    5473             :                  */
    5474      236160 :                 GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    5475             :                                  dfValueReal, dfValueImag);
    5476             :             }
    5477             : 
    5478      236160 :             if (!bHasFoundDensity)
    5479           0 :                 continue;
    5480             : 
    5481             :             /* --------------------------------------------------------------------
    5482             :              */
    5483             :             /*      Update destination density/validity masks. */
    5484             :             /* --------------------------------------------------------------------
    5485             :              */
    5486      236160 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5487             : 
    5488      236160 :             if (poWK->panDstValid != nullptr)
    5489             :             {
    5490           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5491             :             }
    5492             :         } /* Next iDstX */
    5493             : 
    5494             :         /* --------------------------------------------------------------------
    5495             :          */
    5496             :         /*      Report progress to the user, and optionally cancel out. */
    5497             :         /* --------------------------------------------------------------------
    5498             :          */
    5499        6230 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5500           0 :             break;
    5501             :     }
    5502             : 
    5503             :     /* -------------------------------------------------------------------- */
    5504             :     /*      Cleanup and return.                                             */
    5505             :     /* -------------------------------------------------------------------- */
    5506         239 :     CPLFree(padfX);
    5507         239 :     CPLFree(padfY);
    5508         239 :     CPLFree(padfZ);
    5509         239 :     CPLFree(pabSuccess);
    5510         239 :     if (psWrkStruct)
    5511         220 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5512         239 : }
    5513             : 
    5514         239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
    5515             : {
    5516         239 :     return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
    5517             : }
    5518             : 
    5519             : /************************************************************************/
    5520             : /*                            GWKRealCase()                             */
    5521             : /*                                                                      */
    5522             : /*      General case for non-complex data types.                        */
    5523             : /************************************************************************/
    5524             : 
    5525         158 : static void GWKRealCaseThread(void *pData)
    5526             : 
    5527             : {
    5528         158 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    5529         158 :     GDALWarpKernel *poWK = psJob->poWK;
    5530         158 :     const int iYMin = psJob->iYMin;
    5531         158 :     const int iYMax = psJob->iYMax;
    5532             : 
    5533         158 :     const int nDstXSize = poWK->nDstXSize;
    5534         158 :     const int nSrcXSize = poWK->nSrcXSize;
    5535         158 :     const int nSrcYSize = poWK->nSrcYSize;
    5536             :     const double dfMultFactorVerticalShiftPipeline =
    5537         158 :         poWK->bApplyVerticalShift
    5538         158 :             ? CPLAtof(CSLFetchNameValueDef(
    5539           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5540             :                   "1.0"))
    5541         158 :             : 0.0;
    5542             : 
    5543             :     /* -------------------------------------------------------------------- */
    5544             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5545             :     /*      scanlines worth of positions.                                   */
    5546             :     /* -------------------------------------------------------------------- */
    5547             : 
    5548             :     // For x, 2 *, because we cache the precomputed values at the end.
    5549             :     double *padfX =
    5550         158 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5551             :     double *padfY =
    5552         158 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5553             :     double *padfZ =
    5554         158 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5555         158 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5556             : 
    5557         158 :     const bool bUse4SamplesFormula =
    5558         158 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    5559             : 
    5560         158 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5561         158 :     if (poWK->eResample != GRA_NearestNeighbour)
    5562             :     {
    5563         137 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5564             :     }
    5565         158 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5566         158 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5567         158 :     const double dfErrorThreshold = CPLAtof(
    5568         158 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5569             : 
    5570         458 :     const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
    5571         300 :                                    poWK->papanBandSrcValid == nullptr &&
    5572         142 :                                    poWK->pafUnifiedSrcDensity != nullptr;
    5573             : 
    5574             :     const bool bOneSourceCornerFailsToReproject =
    5575         158 :         GWKOneSourceCornerFailsToReproject(psJob);
    5576             : 
    5577             :     // Precompute values.
    5578       22201 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5579       22043 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5580             : 
    5581             :     /* ==================================================================== */
    5582             :     /*      Loop over output lines.                                         */
    5583             :     /* ==================================================================== */
    5584       24952 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5585             :     {
    5586             :         /* --------------------------------------------------------------------
    5587             :          */
    5588             :         /*      Setup points to transform to source image space. */
    5589             :         /* --------------------------------------------------------------------
    5590             :          */
    5591       24794 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5592       24794 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5593    44315700 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5594    44290900 :             padfY[iDstX] = dfY;
    5595       24794 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5596             : 
    5597             :         /* --------------------------------------------------------------------
    5598             :          */
    5599             :         /*      Transform the points from destination pixel/line coordinates */
    5600             :         /*      to source pixel/line coordinates. */
    5601             :         /* --------------------------------------------------------------------
    5602             :          */
    5603       24794 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5604             :                              padfY, padfZ, pabSuccess);
    5605       24794 :         if (dfSrcCoordPrecision > 0.0)
    5606             :         {
    5607           0 :             GWKRoundSourceCoordinates(
    5608             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5609             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5610           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5611             :         }
    5612             : 
    5613             :         /* ====================================================================
    5614             :          */
    5615             :         /*      Loop over pixels in output scanline. */
    5616             :         /* ====================================================================
    5617             :          */
    5618    44315700 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5619             :         {
    5620    44290900 :             GPtrDiff_t iSrcOffset = 0;
    5621    44290900 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5622             :                                               padfX, padfY, nSrcXSize,
    5623             :                                               nSrcYSize, iSrcOffset))
    5624    43566800 :                 continue;
    5625             : 
    5626             :             /* --------------------------------------------------------------------
    5627             :              */
    5628             :             /*      Do not try to apply transparent/invalid source pixels to the
    5629             :              */
    5630             :             /*      destination.  This currently ignores the multi-pixel input
    5631             :              */
    5632             :             /*      of bilinear and cubic resamples. */
    5633             :             /* --------------------------------------------------------------------
    5634             :              */
    5635    31778200 :             double dfDensity = 1.0;
    5636             : 
    5637    31778200 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5638             :             {
    5639     1656100 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5640     1656100 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    5641             :                 {
    5642     1525010 :                     if (!bOneSourceCornerFailsToReproject)
    5643             :                     {
    5644     1525010 :                         continue;
    5645             :                     }
    5646           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5647             :                                  psJob, iSrcOffset))
    5648             :                     {
    5649           0 :                         dfDensity =
    5650           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5651             :                     }
    5652             :                     else
    5653             :                     {
    5654           0 :                         continue;
    5655             :                     }
    5656             :                 }
    5657             :             }
    5658             : 
    5659    59882200 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5660    29628900 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5661             :             {
    5662    29531300 :                 if (!bOneSourceCornerFailsToReproject)
    5663             :                 {
    5664    29529100 :                     continue;
    5665             :                 }
    5666        2229 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5667             :                 {
    5668           0 :                     continue;
    5669             :                 }
    5670             :             }
    5671             : 
    5672             :             /* ====================================================================
    5673             :              */
    5674             :             /*      Loop processing each band. */
    5675             :             /* ====================================================================
    5676             :              */
    5677      724104 :             bool bHasFoundDensity = false;
    5678             : 
    5679      724104 :             const GPtrDiff_t iDstOffset =
    5680      724104 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5681     2039260 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5682             :             {
    5683     1315160 :                 double dfBandDensity = 0.0;
    5684     1315160 :                 double dfValueReal = 0.0;
    5685             : 
    5686             :                 /* --------------------------------------------------------------------
    5687             :                  */
    5688             :                 /*      Collect the source value. */
    5689             :                 /* --------------------------------------------------------------------
    5690             :                  */
    5691     1315160 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5692             :                     nSrcYSize == 1)
    5693             :                 {
    5694             :                     // FALSE is returned if dfBandDensity == 0, which is
    5695             :                     // checked below.
    5696        1092 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
    5697             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
    5698             :                 }
    5699     1314070 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5700             :                 {
    5701        1086 :                     double dfValueImagIgnored = 0.0;
    5702        1086 :                     GWKBilinearResample4Sample(
    5703        1086 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5704        1086 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5705        1086 :                         &dfValueReal, &dfValueImagIgnored);
    5706             :                 }
    5707     1312980 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5708             :                 {
    5709      691552 :                     if (bSrcMaskIsDensity)
    5710             :                     {
    5711      389755 :                         if (poWK->eWorkingDataType == GDT_Byte)
    5712             :                         {
    5713      389755 :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
    5714      389755 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5715      389755 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5716             :                                 &dfValueReal);
    5717             :                         }
    5718           0 :                         else if (poWK->eWorkingDataType == GDT_UInt16)
    5719             :                         {
    5720             :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<
    5721           0 :                                 GUInt16>(poWK, iBand,
    5722           0 :                                          padfX[iDstX] - poWK->nSrcXOff,
    5723           0 :                                          padfY[iDstX] - poWK->nSrcYOff,
    5724             :                                          &dfBandDensity, &dfValueReal);
    5725             :                         }
    5726             :                         else
    5727             :                         {
    5728           0 :                             GWKCubicResampleSrcMaskIsDensity4SampleReal(
    5729           0 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5730           0 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5731             :                                 &dfValueReal);
    5732             :                         }
    5733             :                     }
    5734             :                     else
    5735             :                     {
    5736      301797 :                         double dfValueImagIgnored = 0.0;
    5737      301797 :                         GWKCubicResample4Sample(
    5738      301797 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5739      301797 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5740             :                             &dfValueReal, &dfValueImagIgnored);
    5741      691552 :                     }
    5742             :                 }
    5743             :                 else
    5744             : #ifdef DEBUG
    5745             :                     // Only useful for clang static analyzer.
    5746      621431 :                     if (psWrkStruct != nullptr)
    5747             : #endif
    5748             :                     {
    5749      621431 :                         double dfValueImagIgnored = 0.0;
    5750      621431 :                         psWrkStruct->pfnGWKResample(
    5751      621431 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5752      621431 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5753             :                             &dfValueReal, &dfValueImagIgnored, psWrkStruct);
    5754             :                     }
    5755             : 
    5756             :                 // If we didn't find any valid inputs skip to next band.
    5757     1315160 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5758           0 :                     continue;
    5759             : 
    5760     1315160 :                 if (poWK->bApplyVerticalShift)
    5761             :                 {
    5762           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5763           0 :                         continue;
    5764             :                     // Subtract padfZ[] since the coordinate transformation is
    5765             :                     // from target to source
    5766           0 :                     dfValueReal =
    5767           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5768           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5769             :                 }
    5770             : 
    5771     1315160 :                 bHasFoundDensity = true;
    5772             : 
    5773             :                 /* --------------------------------------------------------------------
    5774             :                  */
    5775             :                 /*      We have a computed value from the source.  Now apply it
    5776             :                  * to      */
    5777             :                 /*      the destination pixel. */
    5778             :                 /* --------------------------------------------------------------------
    5779             :                  */
    5780     1315160 :                 GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
    5781             :                                      dfValueReal);
    5782             :             }
    5783             : 
    5784      724104 :             if (!bHasFoundDensity)
    5785           0 :                 continue;
    5786             : 
    5787             :             /* --------------------------------------------------------------------
    5788             :              */
    5789             :             /*      Update destination density/validity masks. */
    5790             :             /* --------------------------------------------------------------------
    5791             :              */
    5792      724104 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5793             : 
    5794      724104 :             if (poWK->panDstValid != nullptr)
    5795             :             {
    5796      103866 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5797             :             }
    5798             :         }  // Next iDstX.
    5799             : 
    5800             :         /* --------------------------------------------------------------------
    5801             :          */
    5802             :         /*      Report progress to the user, and optionally cancel out. */
    5803             :         /* --------------------------------------------------------------------
    5804             :          */
    5805       24794 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5806           0 :             break;
    5807             :     }
    5808             : 
    5809             :     /* -------------------------------------------------------------------- */
    5810             :     /*      Cleanup and return.                                             */
    5811             :     /* -------------------------------------------------------------------- */
    5812         158 :     CPLFree(padfX);
    5813         158 :     CPLFree(padfY);
    5814         158 :     CPLFree(padfZ);
    5815         158 :     CPLFree(pabSuccess);
    5816         158 :     if (psWrkStruct)
    5817         137 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5818         158 : }
    5819             : 
    5820         158 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
    5821             : {
    5822         158 :     return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
    5823             : }
    5824             : 
    5825             : /************************************************************************/
    5826             : /*                 GWKCubicResampleNoMasks4MultiBandT()                 */
    5827             : /************************************************************************/
    5828             : 
    5829             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    5830             : /* and enough SSE registries */
    5831             : #if defined(USE_SSE2)
    5832             : 
    5833    94342100 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
    5834             :                                  const __m128 row2, const __m128 row3,
    5835             :                                  const __m128 weightsXY0,
    5836             :                                  const __m128 weightsXY1,
    5837             :                                  const __m128 weightsXY2,
    5838             :                                  const __m128 weightsXY3)
    5839             : {
    5840   660394000 :     return XMMHorizontalAdd(_mm_add_ps(
    5841             :         _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
    5842             :         _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
    5843    94922100 :                    _mm_mul_ps(row3, weightsXY3))));
    5844             : }
    5845             : 
    5846             : template <class T>
    5847    32790977 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
    5848             :                                                double dfSrcX, double dfSrcY,
    5849             :                                                const GPtrDiff_t iDstOffset)
    5850             : {
    5851    32790977 :     const double dfSrcXShifted = dfSrcX - 0.5;
    5852    32790977 :     const int iSrcX = static_cast<int>(dfSrcXShifted);
    5853    32790977 :     const double dfSrcYShifted = dfSrcY - 0.5;
    5854    32790977 :     const int iSrcY = static_cast<int>(dfSrcYShifted);
    5855    32790977 :     const GPtrDiff_t iSrcOffset =
    5856    32790977 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    5857             : 
    5858             :     // Get the bilinear interpolation at the image borders.
    5859    32790977 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    5860    31898077 :         iSrcY + 2 >= poWK->nSrcYSize)
    5861             :     {
    5862     3598480 :         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5863             :         {
    5864             :             T value;
    5865     2702950 :             GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    5866             :                                                &value);
    5867     2739240 :             reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    5868             :                 value;
    5869      895531 :         }
    5870             :     }
    5871             :     else
    5872             :     {
    5873    31931777 :         const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
    5874    31931777 :         const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
    5875             : 
    5876             :         float afCoeffsX[4];
    5877             :         float afCoeffsY[4];
    5878    31931777 :         GWKCubicComputeWeights(fDeltaX, afCoeffsX);
    5879    31590977 :         GWKCubicComputeWeights(fDeltaY, afCoeffsY);
    5880    31918977 :         const auto weightsX = _mm_loadu_ps(afCoeffsX);
    5881             :         const auto weightsXY0 =
    5882    63837854 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
    5883             :         const auto weightsXY1 =
    5884    63837854 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
    5885             :         const auto weightsXY2 =
    5886    63837854 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
    5887             :         const auto weightsXY3 =
    5888    31918977 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
    5889             : 
    5890    31918977 :         const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
    5891             : 
    5892    31918977 :         int iBand = 0;
    5893             :         // Process 2 bands at a time
    5894    63442954 :         for (; iBand + 1 < poWK->nBands; iBand += 2)
    5895             :         {
    5896    31872177 :             const T *CPL_RESTRICT pBand0 =
    5897    31872177 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    5898    31872177 :             const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
    5899             :             const auto row1_0 =
    5900    31797977 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    5901             :             const auto row2_0 =
    5902    31844377 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    5903             :             const auto row3_0 =
    5904    31657877 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    5905             : 
    5906    31823877 :             const T *CPL_RESTRICT pBand1 =
    5907    31823877 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
    5908    31823877 :             const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
    5909             :             const auto row1_1 =
    5910    31836577 :                 XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
    5911             :             const auto row2_1 =
    5912    31878477 :                 XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
    5913             :             const auto row3_1 =
    5914    31690177 :                 XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
    5915             : 
    5916             :             const float fValue_0 =
    5917    31767377 :                 Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
    5918             :                              weightsXY1, weightsXY2, weightsXY3);
    5919             : 
    5920             :             const float fValue_1 =
    5921    31833377 :                 Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
    5922             :                              weightsXY1, weightsXY2, weightsXY3);
    5923             : 
    5924    31852277 :             T *CPL_RESTRICT pDstBand0 =
    5925    31852277 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    5926    31852277 :             pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
    5927             : 
    5928    31559977 :             T *CPL_RESTRICT pDstBand1 =
    5929    31559977 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
    5930    31559977 :             pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
    5931             :         }
    5932    31570777 :         if (iBand < poWK->nBands)
    5933             :         {
    5934    31656777 :             const T *CPL_RESTRICT pBand0 =
    5935    31656777 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    5936    31656777 :             const auto row0 = XMMLoad4Values(pBand0 + iOffset);
    5937             :             const auto row1 =
    5938    31773177 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    5939             :             const auto row2 =
    5940    31831777 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    5941             :             const auto row3 =
    5942    31634277 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    5943             : 
    5944             :             const float fValue =
    5945    31768377 :                 Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
    5946             :                              weightsXY2, weightsXY3);
    5947             : 
    5948    31843477 :             T *CPL_RESTRICT pDstBand =
    5949    31843477 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    5950    31843477 :             pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
    5951             :         }
    5952             :     }
    5953             : 
    5954    32558977 :     if (poWK->pafDstDensity)
    5955    32471236 :         poWK->pafDstDensity[iDstOffset] = 1.0f;
    5956    32558977 : }
    5957             : 
    5958             : #endif  // defined(USE_SSE2)
    5959             : 
    5960             : /************************************************************************/
    5961             : /*                GWKResampleNoMasksOrDstDensityOnlyThreadInternal()    */
    5962             : /************************************************************************/
    5963             : 
    5964             : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
    5965        1770 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
    5966             : 
    5967             : {
    5968        1770 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    5969        1770 :     GDALWarpKernel *poWK = psJob->poWK;
    5970        1770 :     const int iYMin = psJob->iYMin;
    5971        1770 :     const int iYMax = psJob->iYMax;
    5972        1752 :     const double dfMultFactorVerticalShiftPipeline =
    5973        1770 :         poWK->bApplyVerticalShift
    5974          18 :             ? CPLAtof(CSLFetchNameValueDef(
    5975          18 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5976             :                   "1.0"))
    5977             :             : 0.0;
    5978             : 
    5979        1770 :     const int nDstXSize = poWK->nDstXSize;
    5980        1770 :     const int nSrcXSize = poWK->nSrcXSize;
    5981        1770 :     const int nSrcYSize = poWK->nSrcYSize;
    5982             : 
    5983             :     /* -------------------------------------------------------------------- */
    5984             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5985             :     /*      scanlines worth of positions.                                   */
    5986             :     /* -------------------------------------------------------------------- */
    5987             : 
    5988             :     // For x, 2 *, because we cache the precomputed values at the end.
    5989             :     double *padfX =
    5990        1770 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5991             :     double *padfY =
    5992        1769 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5993             :     double *padfZ =
    5994        1771 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5995        1770 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5996             : 
    5997        1771 :     const int nXRadius = poWK->nXRadius;
    5998             :     double *padfWeightsX =
    5999        1771 :         static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
    6000             :     double *padfWeightsY = static_cast<double *>(
    6001        1771 :         CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
    6002        1771 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6003        1771 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6004        1770 :     const double dfErrorThreshold = CPLAtof(
    6005        1770 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6006             : 
    6007             :     // Precompute values.
    6008      401749 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6009      399978 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6010             : 
    6011             :     /* ==================================================================== */
    6012             :     /*      Loop over output lines.                                         */
    6013             :     /* ==================================================================== */
    6014      276842 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6015             :     {
    6016             :         /* --------------------------------------------------------------------
    6017             :          */
    6018             :         /*      Setup points to transform to source image space. */
    6019             :         /* --------------------------------------------------------------------
    6020             :          */
    6021      275069 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6022      275069 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6023    94308759 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6024    94033620 :             padfY[iDstX] = dfY;
    6025      275069 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6026             : 
    6027             :         /* --------------------------------------------------------------------
    6028             :          */
    6029             :         /*      Transform the points from destination pixel/line coordinates */
    6030             :         /*      to source pixel/line coordinates. */
    6031             :         /* --------------------------------------------------------------------
    6032             :          */
    6033      275069 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6034             :                              padfY, padfZ, pabSuccess);
    6035      275074 :         if (dfSrcCoordPrecision > 0.0)
    6036             :         {
    6037        1000 :             GWKRoundSourceCoordinates(
    6038             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6039             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6040        1000 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6041             :         }
    6042             : 
    6043             :         /* ====================================================================
    6044             :          */
    6045             :         /*      Loop over pixels in output scanline. */
    6046             :         /* ====================================================================
    6047             :          */
    6048    94028719 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6049             :         {
    6050    93753680 :             GPtrDiff_t iSrcOffset = 0;
    6051    93753680 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6052             :                                               padfX, padfY, nSrcXSize,
    6053             :                                               nSrcYSize, iSrcOffset))
    6054    42799239 :                 continue;
    6055             : 
    6056             :             /* ====================================================================
    6057             :              */
    6058             :             /*      Loop processing each band. */
    6059             :             /* ====================================================================
    6060             :              */
    6061    83701273 :             const GPtrDiff_t iDstOffset =
    6062    83701273 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6063             : 
    6064             : #if defined(USE_SSE2)
    6065             :             if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
    6066             :                           (std::is_same<T, GByte>::value ||
    6067             :                            std::is_same<T, GUInt16>::value))
    6068             :             {
    6069    33889776 :                 if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
    6070             :                 {
    6071    32838177 :                     GWKCubicResampleNoMasks4MultiBandT<T>(
    6072    32838177 :                         poWK, padfX[iDstX] - poWK->nSrcXOff,
    6073    32838177 :                         padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
    6074             : 
    6075    32579677 :                     continue;
    6076             :                 }
    6077             :             }
    6078             : #endif  // defined(USE_SSE2)
    6079             : 
    6080    50863024 :             [[maybe_unused]] double dfInvWeights = 0;
    6081   144580088 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6082             :             {
    6083    93625804 :                 T value = 0;
    6084             :                 if constexpr (eResample == GRA_NearestNeighbour)
    6085             :                 {
    6086    78467930 :                     value = reinterpret_cast<T *>(
    6087    78467930 :                         poWK->papabySrcImage[iBand])[iSrcOffset];
    6088             :                 }
    6089             :                 else if constexpr (bUse4SamplesFormula)
    6090             :                 {
    6091             :                     if constexpr (eResample == GRA_Bilinear)
    6092     3363189 :                         GWKBilinearResampleNoMasks4SampleT(
    6093     3363189 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6094     3363189 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6095             :                     else
    6096     2231485 :                         GWKCubicResampleNoMasks4SampleT(
    6097     2231485 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6098     2231485 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6099             :                 }
    6100             :                 else
    6101             :                 {
    6102     9563200 :                     GWKResampleNoMasksT(
    6103     9563200 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6104     9563200 :                         padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
    6105             :                         padfWeightsY, dfInvWeights);
    6106             :                 }
    6107             : 
    6108    93610814 :                 if (poWK->bApplyVerticalShift)
    6109             :                 {
    6110         818 :                     if (!std::isfinite(padfZ[iDstX]))
    6111           0 :                         continue;
    6112             :                     // Subtract padfZ[] since the coordinate transformation is
    6113             :                     // from target to source
    6114      107166 :                     value = GWKClampValueT<T>(
    6115         818 :                         double(value) * poWK->dfMultFactorVerticalShift -
    6116         818 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6117             :                 }
    6118             : 
    6119    93717214 :                 if (poWK->pafDstDensity)
    6120    14049274 :                     poWK->pafDstDensity[iDstOffset] = 1.0f;
    6121             : 
    6122    93717214 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6123             :                     value;
    6124             :             }
    6125             :         }
    6126             : 
    6127             :         /* --------------------------------------------------------------------
    6128             :          */
    6129             :         /*      Report progress to the user, and optionally cancel out. */
    6130             :         /* --------------------------------------------------------------------
    6131             :          */
    6132      275070 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6133           1 :             break;
    6134             :     }
    6135             : 
    6136             :     /* -------------------------------------------------------------------- */
    6137             :     /*      Cleanup and return.                                             */
    6138             :     /* -------------------------------------------------------------------- */
    6139        1774 :     CPLFree(padfX);
    6140        1771 :     CPLFree(padfY);
    6141        1771 :     CPLFree(padfZ);
    6142        1771 :     CPLFree(pabSuccess);
    6143        1771 :     CPLFree(padfWeightsX);
    6144        1771 :     CPLFree(padfWeightsY);
    6145        1771 : }
    6146             : 
    6147             : template <class T, GDALResampleAlg eResample>
    6148         987 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
    6149             : {
    6150         987 :     GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6151             :         pData);
    6152         987 : }
    6153             : 
    6154             : template <class T, GDALResampleAlg eResample>
    6155         784 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
    6156             : 
    6157             : {
    6158         784 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6159         784 :     GDALWarpKernel *poWK = psJob->poWK;
    6160             :     static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
    6161         784 :     const bool bUse4SamplesFormula =
    6162         784 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    6163         784 :     if (bUse4SamplesFormula)
    6164         681 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
    6165             :             pData);
    6166             :     else
    6167         103 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6168             :             pData);
    6169         784 : }
    6170             : 
    6171         936 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6172             : {
    6173         936 :     return GWKRun(
    6174             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
    6175         936 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
    6176             : }
    6177             : 
    6178         126 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6179             : {
    6180         126 :     return GWKRun(
    6181             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
    6182             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
    6183         126 :                                                            GRA_Bilinear>);
    6184             : }
    6185             : 
    6186         612 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6187             : {
    6188         612 :     return GWKRun(
    6189             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
    6190         612 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
    6191             : }
    6192             : 
    6193           9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6194             : {
    6195           9 :     return GWKRun(
    6196             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
    6197           9 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
    6198             : }
    6199             : 
    6200             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6201             : 
    6202             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6203             : {
    6204             :     return GWKRun(
    6205             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
    6206             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
    6207             : }
    6208             : #endif
    6209             : 
    6210          12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6211             : {
    6212          12 :     return GWKRun(
    6213             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
    6214          12 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
    6215             : }
    6216             : 
    6217             : /************************************************************************/
    6218             : /*                          GWKNearestByte()                            */
    6219             : /*                                                                      */
    6220             : /*      Case for 8bit input data with nearest neighbour resampling      */
    6221             : /*      using valid flags. Should be as fast as possible for this       */
    6222             : /*      particular transformation type.                                 */
    6223             : /************************************************************************/
    6224             : 
    6225         421 : template <class T> static void GWKNearestThread(void *pData)
    6226             : 
    6227             : {
    6228         421 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6229         421 :     GDALWarpKernel *poWK = psJob->poWK;
    6230         421 :     const int iYMin = psJob->iYMin;
    6231         421 :     const int iYMax = psJob->iYMax;
    6232         421 :     const double dfMultFactorVerticalShiftPipeline =
    6233         421 :         poWK->bApplyVerticalShift
    6234           0 :             ? CPLAtof(CSLFetchNameValueDef(
    6235           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6236             :                   "1.0"))
    6237             :             : 0.0;
    6238             : 
    6239         421 :     const int nDstXSize = poWK->nDstXSize;
    6240         421 :     const int nSrcXSize = poWK->nSrcXSize;
    6241         421 :     const int nSrcYSize = poWK->nSrcYSize;
    6242             : 
    6243             :     /* -------------------------------------------------------------------- */
    6244             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6245             :     /*      scanlines worth of positions.                                   */
    6246             :     /* -------------------------------------------------------------------- */
    6247             : 
    6248             :     // For x, 2 *, because we cache the precomputed values at the end.
    6249             :     double *padfX =
    6250         421 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6251             :     double *padfY =
    6252         421 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6253             :     double *padfZ =
    6254         421 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6255         421 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6256             : 
    6257         421 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6258         421 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6259         421 :     const double dfErrorThreshold = CPLAtof(
    6260         421 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6261             : 
    6262             :     const bool bOneSourceCornerFailsToReproject =
    6263         421 :         GWKOneSourceCornerFailsToReproject(psJob);
    6264             : 
    6265             :     // Precompute values.
    6266       61068 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6267       60647 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6268             : 
    6269             :     /* ==================================================================== */
    6270             :     /*      Loop over output lines.                                         */
    6271             :     /* ==================================================================== */
    6272       46302 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6273             :     {
    6274             : 
    6275             :         /* --------------------------------------------------------------------
    6276             :          */
    6277             :         /*      Setup points to transform to source image space. */
    6278             :         /* --------------------------------------------------------------------
    6279             :          */
    6280       45881 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6281       45881 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6282     9626833 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6283     9580956 :             padfY[iDstX] = dfY;
    6284       45881 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6285             : 
    6286             :         /* --------------------------------------------------------------------
    6287             :          */
    6288             :         /*      Transform the points from destination pixel/line coordinates */
    6289             :         /*      to source pixel/line coordinates. */
    6290             :         /* --------------------------------------------------------------------
    6291             :          */
    6292       45881 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6293             :                              padfY, padfZ, pabSuccess);
    6294       45881 :         if (dfSrcCoordPrecision > 0.0)
    6295             :         {
    6296           0 :             GWKRoundSourceCoordinates(
    6297             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6298             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6299           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6300             :         }
    6301             :         /* ====================================================================
    6302             :          */
    6303             :         /*      Loop over pixels in output scanline. */
    6304             :         /* ====================================================================
    6305             :          */
    6306     9626833 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6307             :         {
    6308     9580956 :             GPtrDiff_t iSrcOffset = 0;
    6309     9580956 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6310             :                                               padfX, padfY, nSrcXSize,
    6311             :                                               nSrcYSize, iSrcOffset))
    6312     2357899 :                 continue;
    6313             : 
    6314             :             /* --------------------------------------------------------------------
    6315             :              */
    6316             :             /*      Do not try to apply invalid source pixels to the dest. */
    6317             :             /* --------------------------------------------------------------------
    6318             :              */
    6319     9402154 :             if (poWK->panUnifiedSrcValid != nullptr &&
    6320     1127315 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6321             :             {
    6322       48962 :                 if (!bOneSourceCornerFailsToReproject)
    6323             :                 {
    6324       41477 :                     continue;
    6325             :                 }
    6326        7485 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    6327             :                 {
    6328        5224 :                     continue;
    6329             :                 }
    6330             :             }
    6331             : 
    6332             :             /* --------------------------------------------------------------------
    6333             :              */
    6334             :             /*      Do not try to apply transparent source pixels to the
    6335             :              * destination.*/
    6336             :             /* --------------------------------------------------------------------
    6337             :              */
    6338     8228129 :             double dfDensity = 1.0;
    6339             : 
    6340     8228129 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    6341             :             {
    6342     1557335 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    6343     1557335 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    6344     1005075 :                     continue;
    6345             :             }
    6346             : 
    6347             :             /* ====================================================================
    6348             :              */
    6349             :             /*      Loop processing each band. */
    6350             :             /* ====================================================================
    6351             :              */
    6352             : 
    6353     7223057 :             const GPtrDiff_t iDstOffset =
    6354     7223057 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6355             : 
    6356    17008004 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6357             :             {
    6358     9784947 :                 T value = 0;
    6359     9784947 :                 double dfBandDensity = 0.0;
    6360             : 
    6361             :                 /* --------------------------------------------------------------------
    6362             :                  */
    6363             :                 /*      Collect the source value. */
    6364             :                 /* --------------------------------------------------------------------
    6365             :                  */
    6366     9784947 :                 if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
    6367             :                                  &value))
    6368             :                 {
    6369             : 
    6370     9784937 :                     if (poWK->bApplyVerticalShift)
    6371             :                     {
    6372           0 :                         if (!std::isfinite(padfZ[iDstX]))
    6373           0 :                             continue;
    6374             :                         // Subtract padfZ[] since the coordinate transformation
    6375             :                         // is from target to source
    6376           0 :                         value = GWKClampValueT<T>(
    6377           0 :                             double(value) * poWK->dfMultFactorVerticalShift -
    6378           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6379             :                     }
    6380             : 
    6381     9784937 :                     GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    6382             :                                           dfBandDensity, value);
    6383             :                 }
    6384             :             }
    6385             : 
    6386             :             /* --------------------------------------------------------------------
    6387             :              */
    6388             :             /*      Mark this pixel valid/opaque in the output. */
    6389             :             /* --------------------------------------------------------------------
    6390             :              */
    6391     7223057 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6392             : 
    6393     7223057 :             if (poWK->panDstValid != nullptr)
    6394             :             {
    6395     5953054 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6396             :             }
    6397             :         } /* Next iDstX */
    6398             : 
    6399             :         /* --------------------------------------------------------------------
    6400             :          */
    6401             :         /*      Report progress to the user, and optionally cancel out. */
    6402             :         /* --------------------------------------------------------------------
    6403             :          */
    6404       45881 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6405           0 :             break;
    6406             :     }
    6407             : 
    6408             :     /* -------------------------------------------------------------------- */
    6409             :     /*      Cleanup and return.                                             */
    6410             :     /* -------------------------------------------------------------------- */
    6411         421 :     CPLFree(padfX);
    6412         421 :     CPLFree(padfY);
    6413         421 :     CPLFree(padfZ);
    6414         421 :     CPLFree(pabSuccess);
    6415         421 : }
    6416             : 
    6417         341 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
    6418             : {
    6419         341 :     return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
    6420             : }
    6421             : 
    6422          14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6423             : {
    6424          14 :     return GWKRun(
    6425             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
    6426          14 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
    6427             : }
    6428             : 
    6429           5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6430             : {
    6431           5 :     return GWKRun(
    6432             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
    6433             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
    6434           5 :                                                            GRA_Bilinear>);
    6435             : }
    6436             : 
    6437           6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6438             : {
    6439           6 :     return GWKRun(
    6440             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
    6441             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
    6442           6 :                                                            GRA_Bilinear>);
    6443             : }
    6444             : 
    6445           4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6446             : {
    6447           4 :     return GWKRun(
    6448             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
    6449             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
    6450           4 :                                                            GRA_Bilinear>);
    6451             : }
    6452             : 
    6453             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6454             : 
    6455             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6456             : {
    6457             :     return GWKRun(
    6458             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
    6459             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
    6460             :                                                            GRA_Bilinear>);
    6461             : }
    6462             : #endif
    6463             : 
    6464           5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6465             : {
    6466           5 :     return GWKRun(
    6467             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
    6468           5 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
    6469             : }
    6470             : 
    6471          14 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6472             : {
    6473          14 :     return GWKRun(
    6474             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
    6475          14 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
    6476             : }
    6477             : 
    6478           6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6479             : {
    6480           6 :     return GWKRun(
    6481             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
    6482           6 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
    6483             : }
    6484             : 
    6485           5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6486             : {
    6487           5 :     return GWKRun(
    6488             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
    6489           5 :         GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
    6490             : }
    6491             : 
    6492          26 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
    6493             : {
    6494          26 :     return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
    6495             : }
    6496             : 
    6497           6 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
    6498             : {
    6499           6 :     return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
    6500             : }
    6501             : 
    6502          11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6503             : {
    6504          11 :     return GWKRun(
    6505             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
    6506          11 :         GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
    6507             : }
    6508             : 
    6509          44 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
    6510             : {
    6511          44 :     return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
    6512             : }
    6513             : 
    6514             : /************************************************************************/
    6515             : /*                           GWKAverageOrMode()                         */
    6516             : /*                                                                      */
    6517             : /************************************************************************/
    6518             : 
    6519             : #define COMPUTE_WEIGHT_Y(iSrcY)                                                \
    6520             :     ((iSrcY == iSrcYMin)                                                       \
    6521             :          ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin))        \
    6522             :      : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax)                       \
    6523             :                                : 1.0)
    6524             : 
    6525             : #define COMPUTE_WEIGHT(iSrcX, dfWeightY)                                       \
    6526             :     ((iSrcX == iSrcXMin)       ? ((iSrcXMin + 1 == iSrcXMax)                   \
    6527             :                                       ? dfWeightY                              \
    6528             :                                       : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
    6529             :      : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax))         \
    6530             :                                : dfWeightY)
    6531             : 
    6532             : static void GWKAverageOrModeThread(void *pData);
    6533             : 
    6534         163 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
    6535             : {
    6536         163 :     return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
    6537             : }
    6538             : 
    6539             : /************************************************************************/
    6540             : /*                   GWKAverageOrModeComputeLineCoords()                */
    6541             : /************************************************************************/
    6542             : 
    6543        8183 : static void GWKAverageOrModeComputeLineCoords(
    6544             :     const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
    6545             :     double *padfY2, double *padfZ, double *padfZ2, int *pabSuccess,
    6546             :     int *pabSuccess2, int iDstY, double dfSrcCoordPrecision,
    6547             :     double dfErrorThreshold)
    6548             : {
    6549        8183 :     const GDALWarpKernel *poWK = psJob->poWK;
    6550        8183 :     const int nDstXSize = poWK->nDstXSize;
    6551             : 
    6552             :     // Setup points to transform to source image space.
    6553     2097530 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6554             :     {
    6555     2089340 :         padfX[iDstX] = iDstX + poWK->nDstXOff;
    6556     2089340 :         padfY[iDstX] = iDstY + poWK->nDstYOff;
    6557     2089340 :         padfZ[iDstX] = 0.0;
    6558     2089340 :         padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
    6559     2089340 :         padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
    6560     2089340 :         padfZ2[iDstX] = 0.0;
    6561             :     }
    6562             : 
    6563             :     /* ----------------------------------------------------------------- */
    6564             :     /*      Transform the points from destination pixel/line coordinates */
    6565             :     /*      to source pixel/line coordinates.                            */
    6566             :     /* ----------------------------------------------------------------- */
    6567        8183 :     poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY,
    6568             :                          padfZ, pabSuccess);
    6569        8183 :     poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
    6570             :                          padfY2, padfZ2, pabSuccess2);
    6571             : 
    6572        8183 :     if (dfSrcCoordPrecision > 0.0)
    6573             :     {
    6574           0 :         GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
    6575             :                                   dfSrcCoordPrecision, dfErrorThreshold,
    6576           0 :                                   poWK->pfnTransformer, psJob->pTransformerArg,
    6577           0 :                                   poWK->nDstXOff, iDstY + poWK->nDstYOff);
    6578           0 :         GWKRoundSourceCoordinates(
    6579             :             nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision,
    6580           0 :             dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6581           0 :             1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff);
    6582             :     }
    6583        8183 : }
    6584             : 
    6585             : /************************************************************************/
    6586             : /*              GWKAverageOrModeComputeSourceCoords()                   */
    6587             : /************************************************************************/
    6588             : 
    6589     2089340 : static bool GWKAverageOrModeComputeSourceCoords(
    6590             :     const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
    6591             :     double *padfY2, int iDstX, int iDstY, int nXMargin, int nYMargin,
    6592             :     // Output:
    6593             :     bool &bWrapOverX, double &dfXMin, double &dfYMin, double &dfXMax,
    6594             :     double &dfYMax, int &iSrcXMin, int &iSrcYMin, int &iSrcXMax, int &iSrcYMax)
    6595             : {
    6596     2089340 :     const GDALWarpKernel *poWK = psJob->poWK;
    6597     2089340 :     const int nSrcXSize = poWK->nSrcXSize;
    6598     2089340 :     const int nSrcYSize = poWK->nSrcYSize;
    6599             : 
    6600             :     // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
    6601             :     // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
    6602     2089340 :     if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6603     1991690 :           padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6604     1991690 :           padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6605     1965300 :           padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6606     1965300 :           padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6607     1911930 :           padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6608     1911430 :           padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
    6609     1910040 :           padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
    6610             :     {
    6611      179362 :         return false;
    6612             :     }
    6613             : 
    6614             :     // Compute corners in source crs.
    6615             : 
    6616             :     // The transformation might not have preserved ordering of
    6617             :     // coordinates so do the necessary swapping (#5433).
    6618             :     // NOTE: this is really an approximative fix. To do something
    6619             :     // more precise we would for example need to compute the
    6620             :     // transformation of coordinates in the
    6621             :     // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
    6622             :     // coordinates, and take the bounding box of the got source
    6623             :     // coordinates.
    6624             : 
    6625     1909980 :     if (padfX[iDstX] > padfX2[iDstX])
    6626      268744 :         std::swap(padfX[iDstX], padfX2[iDstX]);
    6627             : 
    6628             :     // Detect situations where the target pixel is close to the
    6629             :     // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
    6630             :     // close to the left-most and right-most columns of the source
    6631             :     // raster. The 2 value below was experimentally determined to
    6632             :     // avoid false-positives and false-negatives.
    6633             :     // Addresses https://github.com/OSGeo/gdal/issues/6478
    6634     1909980 :     bWrapOverX = false;
    6635     1909980 :     const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
    6636     1909980 :     if (poWK->nSrcXOff == 0 &&
    6637     1909980 :         padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
    6638       16499 :         (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale < nThresholdWrapOverX)
    6639             :     {
    6640             :         // Check there is a discontinuity by checking at mid-pixel.
    6641             :         // NOTE: all this remains fragile. To confidently
    6642             :         // detect antimeridian warping we should probably try to access
    6643             :         // georeferenced coordinates, and not rely only on tests on
    6644             :         // image space coordinates. But accessing georeferenced
    6645             :         // coordinates from here is not trivial, and we would for example
    6646             :         // have to handle both geographic, Mercator, etc.
    6647             :         // Let's hope this heuristics is good enough for now.
    6648        1041 :         double x = iDstX + 0.5 + poWK->nDstXOff;
    6649        1041 :         double y = iDstY + poWK->nDstYOff;
    6650        1041 :         double z = 0;
    6651        1041 :         int bSuccess = FALSE;
    6652        1041 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y, &z,
    6653             :                              &bSuccess);
    6654        1041 :         if (bSuccess && x < padfX[iDstX])
    6655             :         {
    6656        1008 :             bWrapOverX = true;
    6657        1008 :             std::swap(padfX[iDstX], padfX2[iDstX]);
    6658        1008 :             padfX2[iDstX] += nSrcXSize;
    6659             :         }
    6660             :     }
    6661             : 
    6662     1909980 :     dfXMin = padfX[iDstX] - poWK->nSrcXOff;
    6663     1909980 :     dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
    6664     1909980 :     constexpr double EPSILON = 1e-10;
    6665             :     // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
    6666     1909980 :     if (!(dfXMax > -EPSILON && dfXMin < nSrcXSize + EPSILON))
    6667         156 :         return false;
    6668     1909830 :     iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPSILON), 0.0));
    6669     1909830 :     iSrcXMax = static_cast<int>(
    6670     1909830 :         std::min(ceil(dfXMax - EPSILON), static_cast<double>(INT_MAX)));
    6671     1909830 :     if (!bWrapOverX)
    6672     1908820 :         iSrcXMax = std::min(iSrcXMax, nSrcXSize);
    6673     1909830 :     if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
    6674         472 :         iSrcXMax++;
    6675             : 
    6676     1909830 :     if (padfY[iDstX] > padfY2[iDstX])
    6677      270117 :         std::swap(padfY[iDstX], padfY2[iDstX]);
    6678     1909830 :     dfYMin = padfY[iDstX] - poWK->nSrcYOff;
    6679     1909830 :     dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
    6680             :     // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
    6681     1909830 :     if (!(dfYMax > -EPSILON && dfYMin < nSrcYSize + EPSILON))
    6682          78 :         return false;
    6683     1909750 :     iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPSILON), 0.0));
    6684     1909750 :     iSrcYMax = std::min(static_cast<int>(ceil(dfYMax - EPSILON)), nSrcYSize);
    6685     1909750 :     if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
    6686           0 :         iSrcYMax++;
    6687             : 
    6688     1909750 :     return true;
    6689             : }
    6690             : 
    6691             : /************************************************************************/
    6692             : /*                         GWKModeRealType()                            */
    6693             : /************************************************************************/
    6694             : 
    6695       17780 : template <class T> static inline bool IsSame(T a, T b)
    6696             : {
    6697       17780 :     return a == b;
    6698             : }
    6699             : 
    6700           0 : template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b)
    6701             : {
    6702           0 :     return a == b || (CPLIsNan(a) && CPLIsNan(b));
    6703             : }
    6704             : 
    6705          18 : template <> bool IsSame<float>(float a, float b)
    6706             : {
    6707          18 :     return a == b || (std::isnan(a) && std::isnan(b));
    6708             : }
    6709             : 
    6710          56 : template <> bool IsSame<double>(double a, double b)
    6711             : {
    6712          56 :     return a == b || (std::isnan(a) && std::isnan(b));
    6713             : }
    6714             : 
    6715          19 : template <class T> static void GWKModeRealType(GWKJobStruct *psJob)
    6716             : {
    6717          19 :     const GDALWarpKernel *poWK = psJob->poWK;
    6718          19 :     const int iYMin = psJob->iYMin;
    6719          19 :     const int iYMax = psJob->iYMax;
    6720          19 :     const int nDstXSize = poWK->nDstXSize;
    6721          19 :     const int nSrcXSize = poWK->nSrcXSize;
    6722          19 :     const int nSrcYSize = poWK->nSrcYSize;
    6723          19 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    6724             : 
    6725          19 :     T *pVals = nullptr;
    6726          19 :     float *pafCounts = nullptr;
    6727             : 
    6728          19 :     if (nSrcXSize > 0 && nSrcYSize > 0)
    6729             :     {
    6730             :         pVals = static_cast<T *>(
    6731          19 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(T)));
    6732             :         pafCounts = static_cast<float *>(
    6733          19 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    6734          19 :         if (pVals == nullptr || pafCounts == nullptr)
    6735             :         {
    6736           0 :             VSIFree(pVals);
    6737           0 :             VSIFree(pafCounts);
    6738           0 :             return;
    6739             :         }
    6740             :     }
    6741             : 
    6742             :     /* -------------------------------------------------------------------- */
    6743             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    6744             :     /*      scanlines worth of positions.                                   */
    6745             :     /* -------------------------------------------------------------------- */
    6746             : 
    6747             :     double *padfX =
    6748          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6749             :     double *padfY =
    6750          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6751             :     double *padfZ =
    6752          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6753             :     double *padfX2 =
    6754          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6755             :     double *padfY2 =
    6756          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6757             :     double *padfZ2 =
    6758          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6759          19 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6760          19 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6761             : 
    6762          19 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6763          19 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6764          19 :     const double dfErrorThreshold = CPLAtof(
    6765          19 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6766             : 
    6767          19 :     const int nXMargin =
    6768          19 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    6769          19 :     const int nYMargin =
    6770          19 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    6771             : 
    6772             :     /* ==================================================================== */
    6773             :     /*      Loop over output lines.                                         */
    6774             :     /* ==================================================================== */
    6775         116 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6776             :     {
    6777          97 :         GWKAverageOrModeComputeLineCoords(
    6778             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    6779             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    6780             : 
    6781             :         // Loop over pixels in output scanline.
    6782        3514 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6783             :         {
    6784        3417 :             GPtrDiff_t iSrcOffset = 0;
    6785        3417 :             double dfDensity = 1.0;
    6786        3417 :             bool bHasFoundDensity = false;
    6787             : 
    6788        3417 :             bool bWrapOverX = false;
    6789        3417 :             double dfXMin = 0;
    6790        3417 :             double dfYMin = 0;
    6791        3417 :             double dfXMax = 0;
    6792        3417 :             double dfYMax = 0;
    6793        3417 :             int iSrcXMin = 0;
    6794        3417 :             int iSrcYMin = 0;
    6795        3417 :             int iSrcXMax = 0;
    6796        3417 :             int iSrcYMax = 0;
    6797        3417 :             if (!GWKAverageOrModeComputeSourceCoords(
    6798             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    6799             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    6800             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    6801             :             {
    6802           0 :                 continue;
    6803             :             }
    6804             : 
    6805        3417 :             const GPtrDiff_t iDstOffset =
    6806        3417 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6807             : 
    6808             :             // Loop processing each band.
    6809        6834 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6810             :             {
    6811        3417 :                 double dfBandDensity = 0.0;
    6812             : 
    6813        3417 :                 int nBins = 0;
    6814        3417 :                 int iModeIndex = -1;
    6815        3417 :                 T nVal{};
    6816             : 
    6817       10248 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    6818             :                 {
    6819        6831 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    6820        6831 :                     iSrcOffset =
    6821        6831 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    6822       20530 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    6823             :                          iSrcX++, iSrcOffset++)
    6824             :                     {
    6825       13699 :                         if (bWrapOverX)
    6826           0 :                             iSrcOffset =
    6827           0 :                                 (iSrcX % nSrcXSize) +
    6828           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    6829             : 
    6830       13699 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    6831           0 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6832           0 :                             continue;
    6833             : 
    6834       13699 :                         if (GWKGetPixelT(poWK, iBand, iSrcOffset,
    6835       27398 :                                          &dfBandDensity, &nVal) &&
    6836       13699 :                             dfBandDensity > BAND_DENSITY_THRESHOLD)
    6837             :                         {
    6838       13699 :                             const double dfWeight =
    6839       13699 :                                 COMPUTE_WEIGHT(iSrcX, dfWeightY);
    6840             : 
    6841             :                             // Check array for existing entry.
    6842       13699 :                             int i = 0;
    6843       29194 :                             for (i = 0; i < nBins; ++i)
    6844             :                             {
    6845       17807 :                                 if (IsSame(pVals[i], nVal))
    6846             :                                 {
    6847             : 
    6848        2312 :                                     pafCounts[i] +=
    6849        2312 :                                         static_cast<float>(dfWeight);
    6850        2312 :                                     bool bValIsMaxCount =
    6851        2312 :                                         (pafCounts[i] > pafCounts[iModeIndex]);
    6852             : 
    6853        2312 :                                     if (!bValIsMaxCount &&
    6854        1498 :                                         pafCounts[i] == pafCounts[iModeIndex])
    6855             :                                     {
    6856        1490 :                                         switch (eTieStrategy)
    6857             :                                         {
    6858        1477 :                                             case GWKTS_First:
    6859        1477 :                                                 break;
    6860           6 :                                             case GWKTS_Min:
    6861           6 :                                                 bValIsMaxCount =
    6862           6 :                                                     nVal < pVals[iModeIndex];
    6863           6 :                                                 break;
    6864           7 :                                             case GWKTS_Max:
    6865           7 :                                                 bValIsMaxCount =
    6866           7 :                                                     nVal > pVals[iModeIndex];
    6867           7 :                                                 break;
    6868             :                                         }
    6869             :                                     }
    6870             : 
    6871        2312 :                                     if (bValIsMaxCount)
    6872             :                                     {
    6873         817 :                                         iModeIndex = i;
    6874             :                                     }
    6875             : 
    6876        2312 :                                     break;
    6877             :                                 }
    6878             :                             }
    6879             : 
    6880             :                             // Add to arr if entry not already there.
    6881       13699 :                             if (i == nBins)
    6882             :                             {
    6883       11387 :                                 pVals[i] = nVal;
    6884       11387 :                                 pafCounts[i] = static_cast<float>(dfWeight);
    6885             : 
    6886       11387 :                                 if (iModeIndex < 0)
    6887        3417 :                                     iModeIndex = i;
    6888             : 
    6889       11387 :                                 ++nBins;
    6890             :                             }
    6891             :                         }
    6892             :                     }
    6893             :                 }
    6894             : 
    6895        3417 :                 if (iModeIndex != -1)
    6896             :                 {
    6897        3417 :                     nVal = pVals[iModeIndex];
    6898        3417 :                     dfBandDensity = 1;
    6899        3417 :                     bHasFoundDensity = true;
    6900             :                 }
    6901             : 
    6902             :                 // We have a computed value from the source.  Now apply it
    6903             :                 // to the destination pixel
    6904        3417 :                 if (bHasFoundDensity)
    6905             :                 {
    6906        3417 :                     GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    6907             :                                           dfBandDensity, nVal);
    6908             :                 }
    6909             :             }
    6910             : 
    6911        3417 :             if (!bHasFoundDensity)
    6912           0 :                 continue;
    6913             : 
    6914             :             /* --------------------------------------------------------------------
    6915             :              */
    6916             :             /*      Update destination density/validity masks. */
    6917             :             /* --------------------------------------------------------------------
    6918             :              */
    6919        3417 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6920             : 
    6921        3417 :             if (poWK->panDstValid != nullptr)
    6922             :             {
    6923           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6924             :             }
    6925             :         } /* Next iDstX */
    6926             : 
    6927             :         /* --------------------------------------------------------------------
    6928             :          */
    6929             :         /*      Report progress to the user, and optionally cancel out. */
    6930             :         /* --------------------------------------------------------------------
    6931             :          */
    6932          97 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6933           0 :             break;
    6934             :     }
    6935             : 
    6936             :     /* -------------------------------------------------------------------- */
    6937             :     /*      Cleanup and return.                                             */
    6938             :     /* -------------------------------------------------------------------- */
    6939          19 :     CPLFree(padfX);
    6940          19 :     CPLFree(padfY);
    6941          19 :     CPLFree(padfZ);
    6942          19 :     CPLFree(padfX2);
    6943          19 :     CPLFree(padfY2);
    6944          19 :     CPLFree(padfZ2);
    6945          19 :     CPLFree(pabSuccess);
    6946          19 :     CPLFree(pabSuccess2);
    6947          19 :     VSIFree(pVals);
    6948          19 :     VSIFree(pafCounts);
    6949             : }
    6950             : 
    6951             : /************************************************************************/
    6952             : /*                        GWKModeComplexType()                          */
    6953             : /************************************************************************/
    6954             : 
    6955           8 : static void GWKModeComplexType(GWKJobStruct *psJob)
    6956             : {
    6957           8 :     const GDALWarpKernel *poWK = psJob->poWK;
    6958           8 :     const int iYMin = psJob->iYMin;
    6959           8 :     const int iYMax = psJob->iYMax;
    6960           8 :     const int nDstXSize = poWK->nDstXSize;
    6961           8 :     const int nSrcXSize = poWK->nSrcXSize;
    6962           8 :     const int nSrcYSize = poWK->nSrcYSize;
    6963           8 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    6964             :     const double dfMultFactorVerticalShiftPipeline =
    6965           8 :         poWK->bApplyVerticalShift
    6966           8 :             ? CPLAtof(CSLFetchNameValueDef(
    6967           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6968             :                   "1.0"))
    6969           8 :             : 0.0;
    6970             : 
    6971           8 :     double *padfRealVals = nullptr;
    6972           8 :     double *padfImagVals = nullptr;
    6973           8 :     float *pafCounts = nullptr;
    6974             : 
    6975           8 :     if (nSrcXSize > 0 && nSrcYSize > 0)
    6976             :     {
    6977             :         padfRealVals = static_cast<double *>(
    6978           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
    6979             :         padfImagVals = static_cast<double *>(
    6980           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
    6981             :         pafCounts = static_cast<float *>(
    6982           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    6983           8 :         if (padfRealVals == nullptr || padfImagVals == nullptr ||
    6984             :             pafCounts == nullptr)
    6985             :         {
    6986           0 :             VSIFree(padfRealVals);
    6987           0 :             VSIFree(padfImagVals);
    6988           0 :             VSIFree(pafCounts);
    6989           0 :             return;
    6990             :         }
    6991             :     }
    6992             : 
    6993             :     /* -------------------------------------------------------------------- */
    6994             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    6995             :     /*      scanlines worth of positions.                                   */
    6996             :     /* -------------------------------------------------------------------- */
    6997             : 
    6998             :     double *padfX =
    6999           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7000             :     double *padfY =
    7001           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7002             :     double *padfZ =
    7003           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7004             :     double *padfX2 =
    7005           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7006             :     double *padfY2 =
    7007           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7008             :     double *padfZ2 =
    7009           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7010           8 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7011           8 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7012             : 
    7013           8 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7014           8 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7015           8 :     const double dfErrorThreshold = CPLAtof(
    7016           8 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7017             : 
    7018             :     const int nXMargin =
    7019           8 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7020             :     const int nYMargin =
    7021           8 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7022             : 
    7023             :     /* ==================================================================== */
    7024             :     /*      Loop over output lines.                                         */
    7025             :     /* ==================================================================== */
    7026          16 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7027             :     {
    7028           8 :         GWKAverageOrModeComputeLineCoords(
    7029             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7030             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7031             : 
    7032             :         // Loop over pixels in output scanline.
    7033          16 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7034             :         {
    7035           8 :             GPtrDiff_t iSrcOffset = 0;
    7036           8 :             double dfDensity = 1.0;
    7037           8 :             bool bHasFoundDensity = false;
    7038             : 
    7039           8 :             bool bWrapOverX = false;
    7040           8 :             double dfXMin = 0;
    7041           8 :             double dfYMin = 0;
    7042           8 :             double dfXMax = 0;
    7043           8 :             double dfYMax = 0;
    7044           8 :             int iSrcXMin = 0;
    7045           8 :             int iSrcYMin = 0;
    7046           8 :             int iSrcXMax = 0;
    7047           8 :             int iSrcYMax = 0;
    7048           8 :             if (!GWKAverageOrModeComputeSourceCoords(
    7049             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7050             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7051             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7052             :             {
    7053           0 :                 continue;
    7054             :             }
    7055             : 
    7056           8 :             const GPtrDiff_t iDstOffset =
    7057           8 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7058             : 
    7059             :             // Loop processing each band.
    7060          16 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7061             :             {
    7062           8 :                 double dfBandDensity = 0.0;
    7063             : 
    7064           8 :                 int nBins = 0;
    7065           8 :                 int iModeIndex = -1;
    7066           8 :                 double dfValueReal = 0;
    7067           8 :                 double dfValueImag = 0;
    7068             : 
    7069          16 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7070             :                 {
    7071           8 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7072           8 :                     iSrcOffset =
    7073           8 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7074          38 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7075             :                          iSrcX++, iSrcOffset++)
    7076             :                     {
    7077          30 :                         if (bWrapOverX)
    7078           0 :                             iSrcOffset =
    7079           0 :                                 (iSrcX % nSrcXSize) +
    7080           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7081             : 
    7082          30 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7083           0 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7084           0 :                             continue;
    7085             : 
    7086          30 :                         if (GWKGetPixelValue(poWK, iBand, iSrcOffset,
    7087             :                                              &dfBandDensity, &dfValueReal,
    7088          60 :                                              &dfValueImag) &&
    7089          30 :                             dfBandDensity > BAND_DENSITY_THRESHOLD)
    7090             :                         {
    7091          30 :                             const double dfWeight =
    7092          30 :                                 COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7093             : 
    7094             :                             // Check array for existing entry.
    7095          30 :                             int i = 0;
    7096          49 :                             for (i = 0; i < nBins; ++i)
    7097             :                             {
    7098          47 :                                 if (IsSame(padfRealVals[i], dfValueReal) &&
    7099          14 :                                     IsSame(padfImagVals[i], dfValueImag))
    7100             :                                 {
    7101             : 
    7102          14 :                                     pafCounts[i] +=
    7103          14 :                                         static_cast<float>(dfWeight);
    7104          14 :                                     bool bValIsMaxCount =
    7105          14 :                                         (pafCounts[i] > pafCounts[iModeIndex]);
    7106             : 
    7107          14 :                                     if (!bValIsMaxCount &&
    7108           6 :                                         pafCounts[i] == pafCounts[iModeIndex])
    7109             :                                     {
    7110           3 :                                         switch (eTieStrategy)
    7111             :                                         {
    7112           3 :                                             case GWKTS_First:
    7113           3 :                                                 break;
    7114           0 :                                             case GWKTS_Min:
    7115           0 :                                                 bValIsMaxCount =
    7116           0 :                                                     dfValueReal <
    7117           0 :                                                     padfRealVals[iModeIndex];
    7118           0 :                                                 break;
    7119           0 :                                             case GWKTS_Max:
    7120           0 :                                                 bValIsMaxCount =
    7121           0 :                                                     dfValueReal >
    7122           0 :                                                     padfRealVals[iModeIndex];
    7123           0 :                                                 break;
    7124             :                                         }
    7125             :                                     }
    7126             : 
    7127          14 :                                     if (bValIsMaxCount)
    7128             :                                     {
    7129           8 :                                         iModeIndex = i;
    7130             :                                     }
    7131             : 
    7132          14 :                                     break;
    7133             :                                 }
    7134             :                             }
    7135             : 
    7136             :                             // Add to arr if entry not already there.
    7137          30 :                             if (i == nBins)
    7138             :                             {
    7139          16 :                                 padfRealVals[i] = dfValueReal;
    7140          16 :                                 padfImagVals[i] = dfValueImag;
    7141          16 :                                 pafCounts[i] = static_cast<float>(dfWeight);
    7142             : 
    7143          16 :                                 if (iModeIndex < 0)
    7144           8 :                                     iModeIndex = i;
    7145             : 
    7146          16 :                                 ++nBins;
    7147             :                             }
    7148             :                         }
    7149             :                     }
    7150             :                 }
    7151             : 
    7152           8 :                 if (iModeIndex != -1)
    7153             :                 {
    7154           8 :                     dfValueReal = padfRealVals[iModeIndex];
    7155           8 :                     dfValueImag = padfImagVals[iModeIndex];
    7156           8 :                     dfBandDensity = 1;
    7157             : 
    7158           8 :                     if (poWK->bApplyVerticalShift)
    7159             :                     {
    7160           0 :                         if (!std::isfinite(padfZ[iDstX]))
    7161           0 :                             continue;
    7162             :                         // Subtract padfZ[] since the coordinate
    7163             :                         // transformation is from target to source
    7164           0 :                         dfValueReal =
    7165           0 :                             dfValueReal * poWK->dfMultFactorVerticalShift -
    7166           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    7167             :                     }
    7168             : 
    7169           8 :                     bHasFoundDensity = true;
    7170             :                 }
    7171             : 
    7172             :                 // We have a computed value from the source.  Now apply it
    7173             :                 // to the destination pixel
    7174           8 :                 if (bHasFoundDensity)
    7175             :                 {
    7176           8 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    7177             :                                      dfValueReal, dfValueImag);
    7178             :                 }
    7179             :             }
    7180             : 
    7181           8 :             if (!bHasFoundDensity)
    7182           0 :                 continue;
    7183             : 
    7184             :             /* --------------------------------------------------------------------
    7185             :              */
    7186             :             /*      Update destination density/validity masks. */
    7187             :             /* --------------------------------------------------------------------
    7188             :              */
    7189           8 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7190             : 
    7191           8 :             if (poWK->panDstValid != nullptr)
    7192             :             {
    7193           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7194             :             }
    7195             :         } /* Next iDstX */
    7196             : 
    7197             :         /* --------------------------------------------------------------------
    7198             :          */
    7199             :         /*      Report progress to the user, and optionally cancel out. */
    7200             :         /* --------------------------------------------------------------------
    7201             :          */
    7202           8 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    7203           0 :             break;
    7204             :     }
    7205             : 
    7206             :     /* -------------------------------------------------------------------- */
    7207             :     /*      Cleanup and return.                                             */
    7208             :     /* -------------------------------------------------------------------- */
    7209           8 :     CPLFree(padfX);
    7210           8 :     CPLFree(padfY);
    7211           8 :     CPLFree(padfZ);
    7212           8 :     CPLFree(padfX2);
    7213           8 :     CPLFree(padfY2);
    7214           8 :     CPLFree(padfZ2);
    7215           8 :     CPLFree(pabSuccess);
    7216           8 :     CPLFree(pabSuccess2);
    7217           8 :     VSIFree(padfRealVals);
    7218           8 :     VSIFree(padfImagVals);
    7219           8 :     VSIFree(pafCounts);
    7220             : }
    7221             : 
    7222             : /************************************************************************/
    7223             : /*                       GWKAverageOrModeThread()                       */
    7224             : /************************************************************************/
    7225             : 
    7226             : // Overall logic based on GWKGeneralCaseThread().
    7227         163 : static void GWKAverageOrModeThread(void *pData)
    7228             : {
    7229         163 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    7230         163 :     const GDALWarpKernel *poWK = psJob->poWK;
    7231         163 :     const int iYMin = psJob->iYMin;
    7232         163 :     const int iYMax = psJob->iYMax;
    7233             :     const double dfMultFactorVerticalShiftPipeline =
    7234         163 :         poWK->bApplyVerticalShift
    7235         163 :             ? CPLAtof(CSLFetchNameValueDef(
    7236           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    7237             :                   "1.0"))
    7238         163 :             : 0.0;
    7239             : 
    7240         163 :     const int nDstXSize = poWK->nDstXSize;
    7241         163 :     const int nSrcXSize = poWK->nSrcXSize;
    7242             : 
    7243             :     /* -------------------------------------------------------------------- */
    7244             :     /*      Find out which algorithm to use (small optim.)                  */
    7245             :     /* -------------------------------------------------------------------- */
    7246             : 
    7247             :     // Only used for GRA_Mode
    7248         163 :     float *pafCounts = nullptr;
    7249         163 :     int nBins = 0;
    7250         163 :     int nBinsOffset = 0;
    7251         163 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7252             : 
    7253             :     // Only used with Q1, Med and Q3
    7254         163 :     float quant = 0.0f;
    7255             : 
    7256             :     // To control array allocation only when data type is complex
    7257         163 :     const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
    7258             : 
    7259         163 :     if (poWK->eResample == GRA_Mode)
    7260             :     {
    7261          45 :         if (poWK->bApplyVerticalShift)
    7262             :         {
    7263           0 :             return GWKModeComplexType(psJob);
    7264             :         }
    7265             : 
    7266          45 :         switch (poWK->eWorkingDataType)
    7267             :         {
    7268           7 :             case GDT_Byte:
    7269           7 :                 nBins = 256;
    7270           7 :                 break;
    7271             : 
    7272           0 :             case GDT_Int8:
    7273           0 :                 nBins = 256;
    7274           0 :                 nBinsOffset = nBins / 2;
    7275           0 :                 break;
    7276             : 
    7277           1 :             case GDT_UInt16:
    7278           1 :                 nBins = 65536;
    7279           1 :                 break;
    7280             : 
    7281          10 :             case GDT_Int16:
    7282          10 :                 nBins = 65536;
    7283          10 :                 nBinsOffset = nBins / 2;
    7284          10 :                 break;
    7285             : 
    7286          10 :             case GDT_Int32:
    7287          10 :                 return GWKModeRealType<int32_t>(psJob);
    7288             : 
    7289           1 :             case GDT_UInt32:
    7290           1 :                 return GWKModeRealType<uint32_t>(psJob);
    7291             : 
    7292           1 :             case GDT_Int64:
    7293           1 :                 return GWKModeRealType<int64_t>(psJob);
    7294             : 
    7295           1 :             case GDT_UInt64:
    7296           1 :                 return GWKModeRealType<uint64_t>(psJob);
    7297             : 
    7298           0 :             case GDT_Float16:
    7299           0 :                 return GWKModeRealType<GFloat16>(psJob);
    7300             : 
    7301           4 :             case GDT_Float32:
    7302           4 :                 return GWKModeRealType<float>(psJob);
    7303             : 
    7304           2 :             case GDT_Float64:
    7305           2 :                 return GWKModeRealType<double>(psJob);
    7306             : 
    7307           8 :             case GDT_CInt16:
    7308             :             case GDT_CInt32:
    7309             :             case GDT_CFloat16:
    7310             :             case GDT_CFloat32:
    7311             :             case GDT_CFloat64:
    7312           8 :                 return GWKModeComplexType(psJob);
    7313             : 
    7314           0 :             case GDT_Unknown:
    7315             :             case GDT_TypeCount:
    7316           0 :                 CPLAssert(false);
    7317             :                 return;
    7318             :         }
    7319             : 
    7320          18 :         if (nBins)
    7321             :         {
    7322             :             pafCounts =
    7323          18 :                 static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
    7324          18 :             if (pafCounts == nullptr)
    7325           0 :                 return;
    7326             :         }
    7327             :     }
    7328         118 :     else if (poWK->eResample == GRA_Med)
    7329             :     {
    7330           6 :         quant = 0.5f;
    7331             :     }
    7332         112 :     else if (poWK->eResample == GRA_Q1)
    7333             :     {
    7334          10 :         quant = 0.25f;
    7335             :     }
    7336         102 :     else if (poWK->eResample == GRA_Q3)
    7337             :     {
    7338           5 :         quant = 0.75f;
    7339             :     }
    7340          97 :     else if (poWK->eResample != GRA_Average && poWK->eResample != GRA_RMS &&
    7341          11 :              poWK->eResample != GRA_Min && poWK->eResample != GRA_Max)
    7342             :     {
    7343             :         // Other resample algorithms not permitted here.
    7344           0 :         CPLError(CE_Fatal, CPLE_AppDefined,
    7345             :                  "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
    7346             :                  "illegal resample");
    7347             :     }
    7348             : 
    7349         136 :     CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread()");
    7350             : 
    7351             :     /* -------------------------------------------------------------------- */
    7352             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7353             :     /*      scanlines worth of positions.                                   */
    7354             :     /* -------------------------------------------------------------------- */
    7355             : 
    7356             :     double *padfX =
    7357         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7358             :     double *padfY =
    7359         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7360             :     double *padfZ =
    7361         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7362             :     double *padfX2 =
    7363         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7364             :     double *padfY2 =
    7365         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7366             :     double *padfZ2 =
    7367         136 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7368         136 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7369         136 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7370             : 
    7371         136 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7372         136 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7373         136 :     const double dfErrorThreshold = CPLAtof(
    7374         136 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7375             : 
    7376             :     const double dfExcludedValuesThreshold =
    7377         136 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7378             :                                      "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
    7379         136 :         100.0;
    7380             :     const double dfNodataValuesThreshold =
    7381         136 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7382             :                                      "NODATA_VALUES_PCT_THRESHOLD", "100")) /
    7383         136 :         100.0;
    7384             : 
    7385             :     const int nXMargin =
    7386         136 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7387             :     const int nYMargin =
    7388         136 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7389             : 
    7390             :     /* ==================================================================== */
    7391             :     /*      Loop over output lines.                                         */
    7392             :     /* ==================================================================== */
    7393        8214 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7394             :     {
    7395        8078 :         GWKAverageOrModeComputeLineCoords(
    7396             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7397             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7398             : 
    7399             :         /* ====================================================================
    7400             :          */
    7401             :         /*      Loop over pixels in output scanline. */
    7402             :         /* ====================================================================
    7403             :          */
    7404     2094000 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7405             :         {
    7406     2085920 :             GPtrDiff_t iSrcOffset = 0;
    7407     2085920 :             double dfDensity = 1.0;
    7408     2085920 :             bool bHasFoundDensity = false;
    7409             : 
    7410     2085920 :             bool bWrapOverX = false;
    7411     2085920 :             double dfXMin = 0;
    7412     2085920 :             double dfYMin = 0;
    7413     2085920 :             double dfXMax = 0;
    7414     2085920 :             double dfYMax = 0;
    7415     2085920 :             int iSrcXMin = 0;
    7416     2085920 :             int iSrcYMin = 0;
    7417     2085920 :             int iSrcXMax = 0;
    7418     2085920 :             int iSrcYMax = 0;
    7419     2085920 :             if (!GWKAverageOrModeComputeSourceCoords(
    7420             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7421             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7422             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7423             :             {
    7424      687183 :                 continue;
    7425             :             }
    7426             : 
    7427     1906320 :             const GPtrDiff_t iDstOffset =
    7428     1906320 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7429             : 
    7430     1906320 :             bool bDone = false;
    7431             : 
    7432             :             // Special Average mode where we process all bands together,
    7433             :             // to avoid averaging tuples that match an entry of m_aadfExcludedValues
    7434     1906320 :             constexpr double EPSILON = 1e-10;
    7435     4613330 :             if (poWK->eResample == GRA_Average &&
    7436      800681 :                 (!poWK->m_aadfExcludedValues.empty() ||
    7437      589832 :                  dfNodataValuesThreshold < 1 - EPSILON) &&
    7438     2707000 :                 !poWK->bApplyVerticalShift && !bIsComplex)
    7439             :             {
    7440      589832 :                 double dfTotalWeightInvalid = 0.0;
    7441      589832 :                 double dfTotalWeightExcluded = 0.0;
    7442      589832 :                 double dfTotalWeightRegular = 0.0;
    7443     1179660 :                 std::vector<double> adfValueReal(poWK->nBands, 0);
    7444     1179660 :                 std::vector<double> adfValueAveraged(poWK->nBands, 0);
    7445             :                 std::vector<int> anCountExcludedValues(
    7446      589832 :                     poWK->m_aadfExcludedValues.size(), 0);
    7447             : 
    7448     2162710 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7449             :                 {
    7450     1572880 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7451     1572880 :                     iSrcOffset =
    7452     1572880 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7453     6291500 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7454             :                          iSrcX++, iSrcOffset++)
    7455             :                     {
    7456     4718620 :                         if (bWrapOverX)
    7457           0 :                             iSrcOffset =
    7458           0 :                                 (iSrcX % nSrcXSize) +
    7459           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7460             : 
    7461     4718620 :                         const double dfWeight =
    7462     4718620 :                             COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7463     4718620 :                         if (dfWeight <= 0)
    7464           0 :                             continue;
    7465             : 
    7466     4718640 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7467          12 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7468             :                         {
    7469           3 :                             dfTotalWeightInvalid += dfWeight;
    7470           3 :                             continue;
    7471             :                         }
    7472             : 
    7473     4718620 :                         bool bAllValid = true;
    7474     8651150 :                         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7475             :                         {
    7476     7340300 :                             double dfBandDensity = 0;
    7477     7340300 :                             double dfValueImagTmp = 0;
    7478    11272800 :                             if (!(GWKGetPixelValue(
    7479             :                                       poWK, iBand, iSrcOffset, &dfBandDensity,
    7480     7340300 :                                       &adfValueReal[iBand], &dfValueImagTmp) &&
    7481     3932530 :                                   dfBandDensity > BAND_DENSITY_THRESHOLD))
    7482             :                             {
    7483     3407770 :                                 bAllValid = false;
    7484     3407770 :                                 break;
    7485             :                             }
    7486             :                         }
    7487             : 
    7488     4718620 :                         if (!bAllValid)
    7489             :                         {
    7490     3407770 :                             dfTotalWeightInvalid += dfWeight;
    7491     3407770 :                             continue;
    7492             :                         }
    7493             : 
    7494     1310850 :                         bool bExcludedValueFound = false;
    7495     2490500 :                         for (size_t i = 0;
    7496     2490500 :                              i < poWK->m_aadfExcludedValues.size(); ++i)
    7497             :                         {
    7498     1179670 :                             if (poWK->m_aadfExcludedValues[i] == adfValueReal)
    7499             :                             {
    7500          22 :                                 bExcludedValueFound = true;
    7501          22 :                                 ++anCountExcludedValues[i];
    7502          22 :                                 dfTotalWeightExcluded += dfWeight;
    7503          22 :                                 break;
    7504             :                             }
    7505             :                         }
    7506     1310850 :                         if (!bExcludedValueFound)
    7507             :                         {
    7508             :                             // Weighted incremental algorithm mean
    7509             :                             // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7510     1310830 :                             dfTotalWeightRegular += dfWeight;
    7511     5243290 :                             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7512             :                             {
    7513     3932460 :                                 adfValueAveraged[iBand] +=
    7514     7864930 :                                     (dfWeight / dfTotalWeightRegular) *
    7515     7864930 :                                     (adfValueReal[iBand] -
    7516     3932460 :                                      adfValueAveraged[iBand]);
    7517             :                             }
    7518             :                         }
    7519             :                     }
    7520             :                 }
    7521             : 
    7522      589832 :                 const double dfTotalWeight = dfTotalWeightInvalid +
    7523             :                                              dfTotalWeightExcluded +
    7524             :                                              dfTotalWeightRegular;
    7525      589832 :                 if (dfTotalWeightInvalid > 0 &&
    7526             :                     dfTotalWeightInvalid >=
    7527      458751 :                         dfNodataValuesThreshold * dfTotalWeight)
    7528             :                 {
    7529             :                     // Do nothing. Let bHasFoundDensity to false.
    7530             :                 }
    7531      131085 :                 else if (dfTotalWeightExcluded > 0 &&
    7532             :                          dfTotalWeightExcluded >=
    7533           7 :                              dfExcludedValuesThreshold * dfTotalWeight)
    7534             :                 {
    7535             :                     // Find the most represented excluded value tuple
    7536           3 :                     size_t iExcludedValue = 0;
    7537           3 :                     int nExcludedValueCount = 0;
    7538           6 :                     for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
    7539             :                          ++i)
    7540             :                     {
    7541           3 :                         if (anCountExcludedValues[i] > nExcludedValueCount)
    7542             :                         {
    7543           3 :                             iExcludedValue = i;
    7544           3 :                             nExcludedValueCount = anCountExcludedValues[i];
    7545             :                         }
    7546             :                     }
    7547             : 
    7548           3 :                     bHasFoundDensity = true;
    7549             : 
    7550          12 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7551             :                     {
    7552           9 :                         GWKSetPixelValue(
    7553             :                             poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
    7554           9 :                             poWK->m_aadfExcludedValues[iExcludedValue][iBand],
    7555             :                             0);
    7556           3 :                     }
    7557             :                 }
    7558      131082 :                 else if (dfTotalWeightRegular > 0)
    7559             :                 {
    7560      131082 :                     bHasFoundDensity = true;
    7561             : 
    7562      524324 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7563             :                     {
    7564      393242 :                         GWKSetPixelValue(poWK, iBand, iDstOffset,
    7565             :                                          /* dfBandDensity = */ 1.0,
    7566      393242 :                                          adfValueAveraged[iBand], 0);
    7567             :                     }
    7568             :                 }
    7569             : 
    7570             :                 // Skip below loop on bands
    7571      589832 :                 bDone = true;
    7572             :             }
    7573             : 
    7574             :             /* ====================================================================
    7575             :              */
    7576             :             /*      Loop processing each band. */
    7577             :             /* ====================================================================
    7578             :              */
    7579             : 
    7580     4729250 :             for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
    7581             :             {
    7582     2822920 :                 double dfBandDensity = 0.0;
    7583     2822920 :                 double dfValueReal = 0.0;
    7584     2822920 :                 double dfValueImag = 0.0;
    7585     2822920 :                 double dfValueRealTmp = 0.0;
    7586     2822920 :                 double dfValueImagTmp = 0.0;
    7587             : 
    7588             :                 /* --------------------------------------------------------------------
    7589             :                  */
    7590             :                 /*      Collect the source value. */
    7591             :                 /* --------------------------------------------------------------------
    7592             :                  */
    7593             : 
    7594             :                 // Loop over source lines and pixels - 3 possible algorithms.
    7595             : 
    7596     2822920 :                 if (poWK->eResample == GRA_Average)
    7597             :                 {
    7598      300849 :                     double dfTotalWeight = 0.0;
    7599             : 
    7600             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7601             :                     // in gcore/overview.cpp.
    7602      631308 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7603             :                     {
    7604      330459 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7605      330459 :                         iSrcOffset = iSrcXMin +
    7606      330459 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7607      803200 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7608             :                              iSrcX++, iSrcOffset++)
    7609             :                         {
    7610      472741 :                             if (bWrapOverX)
    7611         630 :                                 iSrcOffset =
    7612         630 :                                     (iSrcX % nSrcXSize) +
    7613         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7614             : 
    7615      472745 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7616           4 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7617             :                                             iSrcOffset))
    7618             :                             {
    7619           1 :                                 continue;
    7620             :                             }
    7621             : 
    7622      472740 :                             if (GWKGetPixelValue(
    7623             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7624      945480 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7625      472740 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7626             :                             {
    7627      472740 :                                 const double dfWeight =
    7628      472740 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7629      472740 :                                 if (dfWeight > 0)
    7630             :                                 {
    7631             :                                     // Weighted incremental algorithm mean
    7632             :                                     // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7633      472740 :                                     dfTotalWeight += dfWeight;
    7634      472740 :                                     dfValueReal +=
    7635      472740 :                                         (dfWeight / dfTotalWeight) *
    7636      472740 :                                         (dfValueRealTmp - dfValueReal);
    7637      472740 :                                     if (bIsComplex)
    7638             :                                     {
    7639         252 :                                         dfValueImag +=
    7640         252 :                                             (dfWeight / dfTotalWeight) *
    7641         252 :                                             (dfValueImagTmp - dfValueImag);
    7642             :                                     }
    7643             :                                 }
    7644             :                             }
    7645             :                         }
    7646             :                     }
    7647             : 
    7648      300849 :                     if (dfTotalWeight > 0)
    7649             :                     {
    7650      300849 :                         if (poWK->bApplyVerticalShift)
    7651             :                         {
    7652           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7653           0 :                                 continue;
    7654             :                             // Subtract padfZ[] since the coordinate
    7655             :                             // transformation is from target to source
    7656           0 :                             dfValueReal =
    7657           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7658           0 :                                 padfZ[iDstX] *
    7659             :                                     dfMultFactorVerticalShiftPipeline;
    7660             :                         }
    7661             : 
    7662      300849 :                         dfBandDensity = 1;
    7663      300849 :                         bHasFoundDensity = true;
    7664             :                     }
    7665             :                 }  // GRA_Average.
    7666             : 
    7667     2522070 :                 else if (poWK->eResample == GRA_RMS)
    7668             :                 {
    7669      300416 :                     double dfTotalReal = 0.0;
    7670      300416 :                     double dfTotalImag = 0.0;
    7671      300416 :                     double dfTotalWeight = 0.0;
    7672             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7673             :                     // in gcore/overview.cpp.
    7674      630578 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7675             :                     {
    7676      330162 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7677      330162 :                         iSrcOffset = iSrcXMin +
    7678      330162 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7679      802723 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7680             :                              iSrcX++, iSrcOffset++)
    7681             :                         {
    7682      472561 :                             if (bWrapOverX)
    7683         630 :                                 iSrcOffset =
    7684         630 :                                     (iSrcX % nSrcXSize) +
    7685         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7686             : 
    7687      472561 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7688           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7689             :                                             iSrcOffset))
    7690             :                             {
    7691           0 :                                 continue;
    7692             :                             }
    7693             : 
    7694      472561 :                             if (GWKGetPixelValue(
    7695             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7696      945122 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7697      472561 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7698             :                             {
    7699      472561 :                                 const double dfWeight =
    7700      472561 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7701      472561 :                                 dfTotalWeight += dfWeight;
    7702      472561 :                                 dfTotalReal +=
    7703      472561 :                                     dfValueRealTmp * dfValueRealTmp * dfWeight;
    7704      472561 :                                 if (bIsComplex)
    7705          48 :                                     dfTotalImag += dfValueImagTmp *
    7706          48 :                                                    dfValueImagTmp * dfWeight;
    7707             :                             }
    7708             :                         }
    7709             :                     }
    7710             : 
    7711      300416 :                     if (dfTotalWeight > 0)
    7712             :                     {
    7713      300416 :                         dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
    7714             : 
    7715      300416 :                         if (poWK->bApplyVerticalShift)
    7716             :                         {
    7717           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7718           0 :                                 continue;
    7719             :                             // Subtract padfZ[] since the coordinate
    7720             :                             // transformation is from target to source
    7721           0 :                             dfValueReal =
    7722           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7723           0 :                                 padfZ[iDstX] *
    7724             :                                     dfMultFactorVerticalShiftPipeline;
    7725             :                         }
    7726             : 
    7727      300416 :                         if (bIsComplex)
    7728          12 :                             dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
    7729             : 
    7730      300416 :                         dfBandDensity = 1;
    7731      300416 :                         bHasFoundDensity = true;
    7732             :                     }
    7733             :                 }  // GRA_RMS.
    7734             : 
    7735     2221660 :                 else if (poWK->eResample == GRA_Mode)
    7736             :                 {
    7737      496623 :                     float fMaxCount = 0.0f;
    7738      496623 :                     int nMode = -1;
    7739      496623 :                     bool bHasSourceValues = false;
    7740             : 
    7741      496623 :                     memset(pafCounts, 0, nBins * sizeof(float));
    7742             : 
    7743     1612560 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7744             :                     {
    7745     1115940 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7746     1115940 :                         iSrcOffset = iSrcXMin +
    7747     1115940 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7748     4733160 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7749             :                              iSrcX++, iSrcOffset++)
    7750             :                         {
    7751     3617230 :                             if (bWrapOverX)
    7752         630 :                                 iSrcOffset =
    7753         630 :                                     (iSrcX % nSrcXSize) +
    7754         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7755             : 
    7756     3617230 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7757           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7758             :                                             iSrcOffset))
    7759           0 :                                 continue;
    7760             : 
    7761     3617230 :                             if (GWKGetPixelValue(
    7762             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7763     7234450 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7764     3617230 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7765             :                             {
    7766     3617230 :                                 bHasSourceValues = true;
    7767     3617230 :                                 const int nVal =
    7768     3617230 :                                     static_cast<int>(dfValueRealTmp);
    7769     3617230 :                                 const int iBin = nVal + nBinsOffset;
    7770     3617230 :                                 const double dfWeight =
    7771     3617230 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7772             : 
    7773             :                                 // Sum the density.
    7774     3617230 :                                 pafCounts[iBin] += static_cast<float>(dfWeight);
    7775             :                                 // Is it the most common value so far?
    7776     3617230 :                                 bool bUpdateMode = pafCounts[iBin] > fMaxCount;
    7777     3617230 :                                 if (!bUpdateMode &&
    7778      778316 :                                     pafCounts[iBin] == fMaxCount)
    7779             :                                 {
    7780      218628 :                                     switch (eTieStrategy)
    7781             :                                     {
    7782      218620 :                                         case GWKTS_First:
    7783      218620 :                                             break;
    7784           4 :                                         case GWKTS_Min:
    7785           4 :                                             bUpdateMode = nVal < nMode;
    7786           4 :                                             break;
    7787           4 :                                         case GWKTS_Max:
    7788           4 :                                             bUpdateMode = nVal > nMode;
    7789           4 :                                             break;
    7790             :                                     }
    7791             :                                 }
    7792     3617230 :                                 if (bUpdateMode)
    7793             :                                 {
    7794     2838920 :                                     nMode = nVal;
    7795     2838920 :                                     fMaxCount = pafCounts[iBin];
    7796             :                                 }
    7797             :                             }
    7798             :                         }
    7799             :                     }
    7800             : 
    7801      496623 :                     if (bHasSourceValues)
    7802             :                     {
    7803      496623 :                         dfValueReal = nMode;
    7804      496623 :                         dfBandDensity = 1;
    7805      496623 :                         bHasFoundDensity = true;
    7806             :                     }
    7807             :                 }  // GRA_Mode.
    7808             : 
    7809     1725040 :                 else if (poWK->eResample == GRA_Max)
    7810             :                 {
    7811      335037 :                     bool bFoundValid = false;
    7812      335037 :                     double dfTotalReal = cpl::NumericLimits<double>::lowest();
    7813             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7814     1288010 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7815             :                     {
    7816      952975 :                         iSrcOffset = iSrcXMin +
    7817      952975 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7818     4406540 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7819             :                              iSrcX++, iSrcOffset++)
    7820             :                         {
    7821     3453560 :                             if (bWrapOverX)
    7822         630 :                                 iSrcOffset =
    7823         630 :                                     (iSrcX % nSrcXSize) +
    7824         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7825             : 
    7826     3456370 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7827        2809 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7828             :                                             iSrcOffset))
    7829             :                             {
    7830        2446 :                                 continue;
    7831             :                             }
    7832             : 
    7833             :                             // Returns pixel value if it is not no data.
    7834     3451120 :                             if (GWKGetPixelValue(
    7835             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7836     6902230 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7837     3451120 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7838             :                             {
    7839     3451120 :                                 bFoundValid = true;
    7840     3451120 :                                 if (dfTotalReal < dfValueRealTmp)
    7841             :                                 {
    7842      442642 :                                     dfTotalReal = dfValueRealTmp;
    7843             :                                 }
    7844             :                             }
    7845             :                         }
    7846             :                     }
    7847             : 
    7848      335037 :                     if (bFoundValid)
    7849             :                     {
    7850      335037 :                         dfValueReal = dfTotalReal;
    7851             : 
    7852      335037 :                         if (poWK->bApplyVerticalShift)
    7853             :                         {
    7854           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7855           0 :                                 continue;
    7856             :                             // Subtract padfZ[] since the coordinate
    7857             :                             // transformation is from target to source
    7858           0 :                             dfValueReal =
    7859           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7860           0 :                                 padfZ[iDstX] *
    7861             :                                     dfMultFactorVerticalShiftPipeline;
    7862             :                         }
    7863             : 
    7864      335037 :                         dfBandDensity = 1;
    7865      335037 :                         bHasFoundDensity = true;
    7866             :                     }
    7867             :                 }
    7868             : 
    7869     1390000 :                 else if (poWK->eResample == GRA_Min)
    7870             :                 {
    7871      335012 :                     bool bFoundValid = false;
    7872      335012 :                     double dfTotalReal = cpl::NumericLimits<double>::max();
    7873             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7874     1287720 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7875             :                     {
    7876      952710 :                         iSrcOffset = iSrcXMin +
    7877      952710 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7878     4403460 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7879             :                              iSrcX++, iSrcOffset++)
    7880             :                         {
    7881     3450750 :                             if (bWrapOverX)
    7882         630 :                                 iSrcOffset =
    7883         630 :                                     (iSrcX % nSrcXSize) +
    7884         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7885             : 
    7886     3450750 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7887           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7888             :                                             iSrcOffset))
    7889             :                             {
    7890           0 :                                 continue;
    7891             :                             }
    7892             : 
    7893             :                             // Returns pixel value if it is not no data.
    7894     3450750 :                             if (GWKGetPixelValue(
    7895             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7896     6901500 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7897     3450750 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7898             :                             {
    7899     3450750 :                                 bFoundValid = true;
    7900     3450750 :                                 if (dfTotalReal > dfValueRealTmp)
    7901             :                                 {
    7902      443069 :                                     dfTotalReal = dfValueRealTmp;
    7903             :                                 }
    7904             :                             }
    7905             :                         }
    7906             :                     }
    7907             : 
    7908      335012 :                     if (bFoundValid)
    7909             :                     {
    7910      335012 :                         dfValueReal = dfTotalReal;
    7911             : 
    7912      335012 :                         if (poWK->bApplyVerticalShift)
    7913             :                         {
    7914           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7915           0 :                                 continue;
    7916             :                             // Subtract padfZ[] since the coordinate
    7917             :                             // transformation is from target to source
    7918           0 :                             dfValueReal =
    7919           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7920           0 :                                 padfZ[iDstX] *
    7921             :                                     dfMultFactorVerticalShiftPipeline;
    7922             :                         }
    7923             : 
    7924      335012 :                         dfBandDensity = 1;
    7925      335012 :                         bHasFoundDensity = true;
    7926             :                     }
    7927             :                 }  // GRA_Min.
    7928             : 
    7929             :                 else
    7930             :                 // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
    7931             :                 {
    7932     1054990 :                     CPLAssert(quant > 0.0f);
    7933             : 
    7934     1054990 :                     bool bFoundValid = false;
    7935     1054990 :                     std::vector<double> dfRealValuesTmp;
    7936             : 
    7937             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7938     4012980 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7939             :                     {
    7940     2957990 :                         iSrcOffset = iSrcXMin +
    7941     2957990 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7942    13509900 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7943             :                              iSrcX++, iSrcOffset++)
    7944             :                         {
    7945    10551900 :                             if (bWrapOverX)
    7946        1890 :                                 iSrcOffset =
    7947        1890 :                                     (iSrcX % nSrcXSize) +
    7948        1890 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7949             : 
    7950    10748500 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7951      196608 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7952             :                                             iSrcOffset))
    7953             :                             {
    7954      195449 :                                 continue;
    7955             :                             }
    7956             : 
    7957             :                             // Returns pixel value if it is not no data.
    7958    10356400 :                             if (GWKGetPixelValue(
    7959             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7960    20712900 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7961    10356400 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7962             :                             {
    7963    10356400 :                                 bFoundValid = true;
    7964    10356400 :                                 dfRealValuesTmp.push_back(dfValueRealTmp);
    7965             :                             }
    7966             :                         }
    7967             :                     }
    7968             : 
    7969     1054990 :                     if (bFoundValid)
    7970             :                     {
    7971     1006150 :                         std::sort(dfRealValuesTmp.begin(),
    7972             :                                   dfRealValuesTmp.end());
    7973             :                         int quantIdx = static_cast<int>(
    7974     1006150 :                             std::ceil(quant * dfRealValuesTmp.size() - 1));
    7975     1006150 :                         dfValueReal = dfRealValuesTmp[quantIdx];
    7976             : 
    7977     1006150 :                         if (poWK->bApplyVerticalShift)
    7978             :                         {
    7979           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7980           0 :                                 continue;
    7981             :                             // Subtract padfZ[] since the coordinate
    7982             :                             // transformation is from target to source
    7983           0 :                             dfValueReal =
    7984           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7985           0 :                                 padfZ[iDstX] *
    7986             :                                     dfMultFactorVerticalShiftPipeline;
    7987             :                         }
    7988             : 
    7989     1006150 :                         dfBandDensity = 1;
    7990     1006150 :                         bHasFoundDensity = true;
    7991     1006150 :                         dfRealValuesTmp.clear();
    7992             :                     }
    7993             :                 }  // Quantile.
    7994             : 
    7995             :                 /* --------------------------------------------------------------------
    7996             :                  */
    7997             :                 /*      We have a computed value from the source.  Now apply it
    7998             :                  * to      */
    7999             :                 /*      the destination pixel. */
    8000             :                 /* --------------------------------------------------------------------
    8001             :                  */
    8002     2822920 :                 if (bHasFoundDensity)
    8003             :                 {
    8004             :                     // TODO: Should we compute dfBandDensity in fct of
    8005             :                     // nCount/nCount2, or use as a threshold to set the dest
    8006             :                     // value?
    8007             :                     // dfBandDensity = (float) nCount / nCount2;
    8008             :                     // if( (float) nCount / nCount2 > 0.1 )
    8009             :                     // or fix gdalwarp crop_to_cutline to crop partially
    8010             :                     // overlapping pixels.
    8011     2774080 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    8012             :                                      dfValueReal, dfValueImag);
    8013             :                 }
    8014             :             }
    8015             : 
    8016     1906320 :             if (!bHasFoundDensity)
    8017      507587 :                 continue;
    8018             : 
    8019             :             /* --------------------------------------------------------------------
    8020             :              */
    8021             :             /*      Update destination density/validity masks. */
    8022             :             /* --------------------------------------------------------------------
    8023             :              */
    8024     1398740 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    8025             : 
    8026     1398740 :             if (poWK->panDstValid != nullptr)
    8027             :             {
    8028        1184 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    8029             :             }
    8030             :         } /* Next iDstX */
    8031             : 
    8032             :         /* --------------------------------------------------------------------
    8033             :          */
    8034             :         /*      Report progress to the user, and optionally cancel out. */
    8035             :         /* --------------------------------------------------------------------
    8036             :          */
    8037        8078 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    8038           0 :             break;
    8039             :     }
    8040             : 
    8041             :     /* -------------------------------------------------------------------- */
    8042             :     /*      Cleanup and return.                                             */
    8043             :     /* -------------------------------------------------------------------- */
    8044         136 :     CPLFree(padfX);
    8045         136 :     CPLFree(padfY);
    8046         136 :     CPLFree(padfZ);
    8047         136 :     CPLFree(padfX2);
    8048         136 :     CPLFree(padfY2);
    8049         136 :     CPLFree(padfZ2);
    8050         136 :     CPLFree(pabSuccess);
    8051         136 :     CPLFree(pabSuccess2);
    8052         136 :     VSIFree(pafCounts);
    8053             : }
    8054             : 
    8055             : /************************************************************************/
    8056             : /*                         getOrientation()                             */
    8057             : /************************************************************************/
    8058             : 
    8059             : typedef std::pair<double, double> XYPair;
    8060             : 
    8061             : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
    8062             : // -1 if it is counter-clockwise oriented,
    8063             : // or 0 if it is colinear.
    8064     2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
    8065             : {
    8066     2355910 :     const double p1x = p1.first;
    8067     2355910 :     const double p1y = p1.second;
    8068     2355910 :     const double p2x = p2.first;
    8069     2355910 :     const double p2y = p2.second;
    8070     2355910 :     const double p3x = p3.first;
    8071     2355910 :     const double p3y = p3.second;
    8072     2355910 :     const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
    8073     2355910 :     if (std::abs(val) < 1e-20)
    8074        2690 :         return 0;
    8075     2353220 :     else if (val > 0)
    8076           0 :         return 1;
    8077             :     else
    8078     2353220 :         return -1;
    8079             : }
    8080             : 
    8081             : /************************************************************************/
    8082             : /*                          isConvex()                                  */
    8083             : /************************************************************************/
    8084             : 
    8085             : typedef std::vector<XYPair> XYPoly;
    8086             : 
    8087             : // poly must be closed
    8088      785302 : static bool isConvex(const XYPoly &poly)
    8089             : {
    8090      785302 :     const size_t n = poly.size();
    8091      785302 :     size_t i = 0;
    8092      785302 :     int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8093      785302 :     ++i;
    8094     2355910 :     for (; i < n - 2; ++i)
    8095             :     {
    8096             :         const int orientation =
    8097     1570600 :             getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8098     1570600 :         if (orientation != 0)
    8099             :         {
    8100     1567910 :             if (last_orientation == 0)
    8101           0 :                 last_orientation = orientation;
    8102     1567910 :             else if (orientation != last_orientation)
    8103           0 :                 return false;
    8104             :         }
    8105             :     }
    8106      785302 :     return true;
    8107             : }
    8108             : 
    8109             : /************************************************************************/
    8110             : /*                     pointIntersectsConvexPoly()                      */
    8111             : /************************************************************************/
    8112             : 
    8113             : // Returns whether xy intersects poly, that must be closed and convex.
    8114     6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
    8115             : {
    8116     6049100 :     const size_t n = poly.size();
    8117     6049100 :     double dx1 = xy.first - poly[0].first;
    8118     6049100 :     double dy1 = xy.second - poly[0].second;
    8119     6049100 :     double dx2 = poly[1].first - poly[0].first;
    8120     6049100 :     double dy2 = poly[1].second - poly[0].second;
    8121     6049100 :     double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
    8122             : 
    8123             :     // Check if the point remains on the same side (left/right) of all edges
    8124    14556400 :     for (size_t i = 2; i < n; i++)
    8125             :     {
    8126    12793100 :         dx1 = xy.first - poly[i - 1].first;
    8127    12793100 :         dy1 = xy.second - poly[i - 1].second;
    8128             : 
    8129    12793100 :         dx2 = poly[i].first - poly[i - 1].first;
    8130    12793100 :         dy2 = poly[i].second - poly[i - 1].second;
    8131             : 
    8132    12793100 :         double crossProduct = dx1 * dy2 - dx2 * dy1;
    8133    12793100 :         if (std::abs(prevCrossProduct) < 1e-20)
    8134      725558 :             prevCrossProduct = crossProduct;
    8135    12067500 :         else if (prevCrossProduct * crossProduct < 0)
    8136     4285760 :             return false;
    8137             :     }
    8138             : 
    8139     1763340 :     return true;
    8140             : }
    8141             : 
    8142             : /************************************************************************/
    8143             : /*                     getIntersection()                                */
    8144             : /************************************************************************/
    8145             : 
    8146             : /* Returns intersection of [p1,p2] with [p3,p4], if
    8147             :  * it is a single point, and the 2 segments are not colinear.
    8148             :  */
    8149    11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
    8150             :                             const XYPair &p3, const XYPair &p4, XYPair &xy)
    8151             : {
    8152    11811000 :     const double x1 = p1.first;
    8153    11811000 :     const double y1 = p1.second;
    8154    11811000 :     const double x2 = p2.first;
    8155    11811000 :     const double y2 = p2.second;
    8156    11811000 :     const double x3 = p3.first;
    8157    11811000 :     const double y3 = p3.second;
    8158    11811000 :     const double x4 = p4.first;
    8159    11811000 :     const double y4 = p4.second;
    8160    11811000 :     const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
    8161    11811000 :     const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
    8162    11811000 :     if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
    8163     9260780 :         return false;
    8164             : 
    8165     2550260 :     const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
    8166     2550260 :     if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
    8167      973924 :         return false;
    8168             : 
    8169     1576340 :     const double t = t_num / denom;
    8170     1576340 :     xy.first = x1 + t * (x2 - x1);
    8171     1576340 :     xy.second = y1 + t * (y2 - y1);
    8172     1576340 :     return true;
    8173             : }
    8174             : 
    8175             : /************************************************************************/
    8176             : /*                     getConvexPolyIntersection()                      */
    8177             : /************************************************************************/
    8178             : 
    8179             : // poly1 and poly2 must be closed and convex.
    8180             : // The returned intersection will not necessary be closed.
    8181      785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
    8182             :                                       XYPoly &intersection)
    8183             : {
    8184      785302 :     intersection.clear();
    8185             : 
    8186             :     // Add all points of poly1 inside poly2
    8187     3926510 :     for (size_t i = 0; i < poly1.size() - 1; ++i)
    8188             :     {
    8189     3141210 :         if (pointIntersectsConvexPoly(poly1[i], poly2))
    8190     1187430 :             intersection.push_back(poly1[i]);
    8191             :     }
    8192      785302 :     if (intersection.size() == poly1.size() - 1)
    8193             :     {
    8194             :         // poly1 is inside poly2
    8195      119100 :         return;
    8196             :     }
    8197             : 
    8198             :     // Add all points of poly2 inside poly1
    8199     3634860 :     for (size_t i = 0; i < poly2.size() - 1; ++i)
    8200             :     {
    8201     2907890 :         if (pointIntersectsConvexPoly(poly2[i], poly1))
    8202      575904 :             intersection.push_back(poly2[i]);
    8203             :     }
    8204             : 
    8205             :     // Compute the intersection of all edges of both polygons
    8206      726972 :     XYPair xy;
    8207     3634860 :     for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
    8208             :     {
    8209    14539400 :         for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
    8210             :         {
    8211    11631600 :             if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
    8212    11631600 :                                 poly2[i2 + 1], xy))
    8213             :             {
    8214     1576230 :                 intersection.push_back(xy);
    8215             :             }
    8216             :         }
    8217             :     }
    8218             : 
    8219      726972 :     if (intersection.empty())
    8220       60770 :         return;
    8221             : 
    8222             :     // Find lowest-left point in intersection set
    8223      666202 :     double lowest_x = cpl::NumericLimits<double>::max();
    8224      666202 :     double lowest_y = cpl::NumericLimits<double>::max();
    8225     3772450 :     for (const auto &pair : intersection)
    8226             :     {
    8227     3106240 :         const double x = pair.first;
    8228     3106240 :         const double y = pair.second;
    8229     3106240 :         if (y < lowest_y || (y == lowest_y && x < lowest_x))
    8230             :         {
    8231     1096040 :             lowest_x = x;
    8232     1096040 :             lowest_y = y;
    8233             :         }
    8234             :     }
    8235             : 
    8236     5737980 :     const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
    8237             :     {
    8238     5737980 :         const double p1x_diff = p1.first - lowest_x;
    8239     5737980 :         const double p1y_diff = p1.second - lowest_y;
    8240     5737980 :         const double p2x_diff = p2.first - lowest_x;
    8241     5737980 :         const double p2y_diff = p2.second - lowest_y;
    8242     5737980 :         if (p2y_diff == 0.0 && p1y_diff == 0.0)
    8243             :         {
    8244     2655420 :             if (p1x_diff >= 0)
    8245             :             {
    8246     2655420 :                 if (p2x_diff >= 0)
    8247     2655420 :                     return p1.first < p2.first;
    8248           0 :                 return true;
    8249             :             }
    8250             :             else
    8251             :             {
    8252           0 :                 if (p2x_diff >= 0)
    8253           0 :                     return false;
    8254           0 :                 return p1.first < p2.first;
    8255             :             }
    8256             :         }
    8257             : 
    8258     3082560 :         if (p2x_diff == 0.0 && p1x_diff == 0.0)
    8259     1046960 :             return p1.second < p2.second;
    8260             : 
    8261             :         double tan_p1;
    8262     2035600 :         if (p1x_diff == 0.0)
    8263      464622 :             tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    8264             :         else
    8265     1570980 :             tan_p1 = p1y_diff / p1x_diff;
    8266             : 
    8267             :         double tan_p2;
    8268     2035600 :         if (p2x_diff == 0.0)
    8269      839515 :             tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    8270             :         else
    8271     1196080 :             tan_p2 = p2y_diff / p2x_diff;
    8272             : 
    8273     2035600 :         if (tan_p1 >= 0)
    8274             :         {
    8275     1904790 :             if (tan_p2 >= 0)
    8276     1881590 :                 return tan_p1 < tan_p2;
    8277             :             else
    8278       23199 :                 return true;
    8279             :         }
    8280             :         else
    8281             :         {
    8282      130806 :             if (tan_p2 >= 0)
    8283      103900 :                 return false;
    8284             :             else
    8285       26906 :                 return tan_p1 < tan_p2;
    8286             :         }
    8287      666202 :     };
    8288             : 
    8289             :     // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
    8290             :     // hull
    8291      666202 :     std::sort(intersection.begin(), intersection.end(), sortFunc);
    8292             : 
    8293             :     // Remove duplicated points
    8294      666202 :     size_t j = 1;
    8295     3106240 :     for (size_t i = 1; i < intersection.size(); ++i)
    8296             :     {
    8297     2440040 :         if (intersection[i] != intersection[i - 1])
    8298             :         {
    8299     1452560 :             if (j < i)
    8300      545275 :                 intersection[j] = intersection[i];
    8301     1452560 :             ++j;
    8302             :         }
    8303             :     }
    8304      666202 :     intersection.resize(j);
    8305             : }
    8306             : 
    8307             : /************************************************************************/
    8308             : /*                            getArea()                                 */
    8309             : /************************************************************************/
    8310             : 
    8311             : // poly may or may not be closed.
    8312      558521 : static double getArea(const XYPoly &poly)
    8313             : {
    8314             :     // CPLAssert(poly.size() >= 2);
    8315      558521 :     const size_t nPointCount = poly.size();
    8316             :     double dfAreaSum =
    8317      558521 :         poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
    8318             : 
    8319     1765140 :     for (size_t i = 1; i < nPointCount - 1; i++)
    8320             :     {
    8321     1206610 :         dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
    8322             :     }
    8323             : 
    8324      558521 :     dfAreaSum += poly[nPointCount - 1].first *
    8325      558521 :                  (poly[0].second - poly[nPointCount - 2].second);
    8326             : 
    8327      558521 :     return 0.5 * std::fabs(dfAreaSum);
    8328             : }
    8329             : 
    8330             : /************************************************************************/
    8331             : /*                           GWKSumPreserving()                         */
    8332             : /************************************************************************/
    8333             : 
    8334             : static void GWKSumPreservingThread(void *pData);
    8335             : 
    8336          18 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
    8337             : {
    8338          18 :     return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
    8339             : }
    8340             : 
    8341          18 : static void GWKSumPreservingThread(void *pData)
    8342             : {
    8343          18 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    8344          18 :     GDALWarpKernel *poWK = psJob->poWK;
    8345          18 :     const int iYMin = psJob->iYMin;
    8346          18 :     const int iYMax = psJob->iYMax;
    8347             :     const bool bIsAffineNoRotation =
    8348          18 :         GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
    8349          26 :                                         poWK->pTransformerArg) &&
    8350             :         // for debug/testing purposes
    8351           8 :         CPLTestBool(
    8352          18 :             CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
    8353             : 
    8354          18 :     const int nDstXSize = poWK->nDstXSize;
    8355          18 :     const int nSrcXSize = poWK->nSrcXSize;
    8356          18 :     const int nSrcYSize = poWK->nSrcYSize;
    8357             : 
    8358          36 :     std::vector<double> adfX0(nSrcXSize + 1);
    8359          36 :     std::vector<double> adfY0(nSrcXSize + 1);
    8360          36 :     std::vector<double> adfZ0(nSrcXSize + 1);
    8361          36 :     std::vector<double> adfX1(nSrcXSize + 1);
    8362          36 :     std::vector<double> adfY1(nSrcXSize + 1);
    8363          36 :     std::vector<double> adfZ1(nSrcXSize + 1);
    8364          36 :     std::vector<int> abSuccess0(nSrcXSize + 1);
    8365          36 :     std::vector<int> abSuccess1(nSrcXSize + 1);
    8366             : 
    8367             :     CPLRectObj sGlobalBounds;
    8368          18 :     sGlobalBounds.minx = -2 * poWK->dfXScale;
    8369          18 :     sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
    8370          18 :     sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
    8371          18 :     sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
    8372          18 :     CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
    8373             : 
    8374             :     struct SourcePixel
    8375             :     {
    8376             :         int iSrcX;
    8377             :         int iSrcY;
    8378             : 
    8379             :         // Coordinates of source pixel in target pixel coordinates
    8380             :         double dfDstX0;
    8381             :         double dfDstY0;
    8382             :         double dfDstX1;
    8383             :         double dfDstY1;
    8384             :         double dfDstX2;
    8385             :         double dfDstY2;
    8386             :         double dfDstX3;
    8387             :         double dfDstY3;
    8388             : 
    8389             :         // Source pixel total area (might be larger than the one described
    8390             :         // by above coordinates, if the pixel was crossing the antimeridian
    8391             :         // and split)
    8392             :         double dfArea;
    8393             :     };
    8394             : 
    8395          36 :     std::vector<SourcePixel> sourcePixels;
    8396             : 
    8397          36 :     XYPoly discontinuityLeft(5);
    8398          36 :     XYPoly discontinuityRight(5);
    8399             : 
    8400             :     /* ==================================================================== */
    8401             :     /*      First pass: transform the 4 corners of each potential           */
    8402             :     /*      contributing source pixel to target pixel coordinates.          */
    8403             :     /* ==================================================================== */
    8404             : 
    8405             :     // Special case for top line
    8406             :     {
    8407          18 :         int iY = 0;
    8408        1130 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8409             :         {
    8410        1112 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8411        1112 :             adfY1[iX] = iY + poWK->nSrcYOff;
    8412        1112 :             adfZ1[iX] = 0;
    8413             :         }
    8414             : 
    8415          18 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8416             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8417             :                              abSuccess1.data());
    8418             : 
    8419        1130 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8420             :         {
    8421        1112 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8422           0 :                 abSuccess1[iX] = FALSE;
    8423             :             else
    8424             :             {
    8425        1112 :                 adfX1[iX] -= poWK->nDstXOff;
    8426        1112 :                 adfY1[iX] -= poWK->nDstYOff;
    8427             :             }
    8428             :         }
    8429             :     }
    8430             : 
    8431      413412 :     const auto getInsideXSign = [poWK, nDstXSize](double dfX)
    8432             :     {
    8433      413412 :         return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
    8434      205344 :                        dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
    8435      413412 :                    ? 1
    8436      208068 :                    : -1;
    8437          18 :     };
    8438             : 
    8439             :     const auto FindDiscontinuity =
    8440          80 :         [poWK, psJob, getInsideXSign](
    8441             :             double dfXLeft, double dfXRight, double dfY,
    8442             :             int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
    8443         800 :             double &dfXMidReprojectedRight, double &dfYMidReprojected)
    8444             :     {
    8445         880 :         for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
    8446             :         {
    8447         800 :             double dfXMid = (dfXLeft + dfXRight) / 2;
    8448         800 :             double dfXMidReprojected = dfXMid;
    8449         800 :             dfYMidReprojected = dfY;
    8450         800 :             double dfZ = 0;
    8451         800 :             int nSuccess = 0;
    8452         800 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
    8453             :                                  &dfXMidReprojected, &dfYMidReprojected, &dfZ,
    8454             :                                  &nSuccess);
    8455         800 :             if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
    8456             :             {
    8457         456 :                 dfXRight = dfXMid;
    8458         456 :                 dfXMidReprojectedRight = dfXMidReprojected;
    8459             :             }
    8460             :             else
    8461             :             {
    8462         344 :                 dfXLeft = dfXMid;
    8463         344 :                 dfXMidReprojectedLeft = dfXMidReprojected;
    8464             :             }
    8465             :         }
    8466          80 :     };
    8467             : 
    8468         566 :     for (int iY = 0; iY < nSrcYSize; ++iY)
    8469             :     {
    8470         548 :         std::swap(adfX0, adfX1);
    8471         548 :         std::swap(adfY0, adfY1);
    8472         548 :         std::swap(adfZ0, adfZ1);
    8473         548 :         std::swap(abSuccess0, abSuccess1);
    8474             : 
    8475      104512 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8476             :         {
    8477      103964 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8478      103964 :             adfY1[iX] = iY + 1 + poWK->nSrcYOff;
    8479      103964 :             adfZ1[iX] = 0;
    8480             :         }
    8481             : 
    8482         548 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8483             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8484             :                              abSuccess1.data());
    8485             : 
    8486      104512 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8487             :         {
    8488      103964 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8489           0 :                 abSuccess1[iX] = FALSE;
    8490             :             else
    8491             :             {
    8492      103964 :                 adfX1[iX] -= poWK->nDstXOff;
    8493      103964 :                 adfY1[iX] -= poWK->nDstYOff;
    8494             :             }
    8495             :         }
    8496             : 
    8497      103964 :         for (int iX = 0; iX < nSrcXSize; ++iX)
    8498             :         {
    8499      206832 :             if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
    8500      103416 :                 abSuccess1[iX + 1])
    8501             :             {
    8502             :                 /* --------------------------------------------------------------------
    8503             :                  */
    8504             :                 /*      Do not try to apply transparent source pixels to the
    8505             :                  * destination.*/
    8506             :                 /* --------------------------------------------------------------------
    8507             :                  */
    8508      103416 :                 const auto iSrcOffset =
    8509      103416 :                     iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
    8510      105816 :                 if (poWK->panUnifiedSrcValid != nullptr &&
    8511        2400 :                     !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    8512             :                 {
    8513       10971 :                     continue;
    8514             :                 }
    8515             : 
    8516      103410 :                 if (poWK->pafUnifiedSrcDensity != nullptr)
    8517             :                 {
    8518           0 :                     if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
    8519             :                         SRC_DENSITY_THRESHOLD_FLOAT)
    8520           0 :                         continue;
    8521             :                 }
    8522             : 
    8523             :                 SourcePixel sp;
    8524      103410 :                 sp.dfArea = 0;
    8525      103410 :                 sp.dfDstX0 = adfX0[iX];
    8526      103410 :                 sp.dfDstY0 = adfY0[iX];
    8527      103410 :                 sp.dfDstX1 = adfX0[iX + 1];
    8528      103410 :                 sp.dfDstY1 = adfY0[iX + 1];
    8529      103410 :                 sp.dfDstX2 = adfX1[iX + 1];
    8530      103410 :                 sp.dfDstY2 = adfY1[iX + 1];
    8531      103410 :                 sp.dfDstX3 = adfX1[iX];
    8532      103410 :                 sp.dfDstY3 = adfY1[iX];
    8533             : 
    8534             :                 // Detect pixel that likely cross the anti-meridian and
    8535             :                 // introduce a discontinuity when reprojected.
    8536             : 
    8537      103410 :                 if (getInsideXSign(adfX0[iX]) !=
    8538      103506 :                         getInsideXSign(adfX0[iX + 1]) &&
    8539         164 :                     getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
    8540          68 :                     getInsideXSign(adfX0[iX + 1]) ==
    8541      103574 :                         getInsideXSign(adfX1[iX + 1]) &&
    8542          40 :                     (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
    8543             :                         0)
    8544             :                 {
    8545          40 :                     double dfXMidReprojectedLeftTop = 0;
    8546          40 :                     double dfXMidReprojectedRightTop = 0;
    8547          40 :                     double dfYMidReprojectedTop = 0;
    8548          40 :                     FindDiscontinuity(
    8549          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8550          80 :                         iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
    8551             :                         dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
    8552             :                         dfYMidReprojectedTop);
    8553          40 :                     double dfXMidReprojectedLeftBottom = 0;
    8554          40 :                     double dfXMidReprojectedRightBottom = 0;
    8555          40 :                     double dfYMidReprojectedBottom = 0;
    8556          40 :                     FindDiscontinuity(
    8557          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8558          80 :                         iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
    8559             :                         dfXMidReprojectedLeftBottom,
    8560             :                         dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
    8561             : 
    8562          40 :                     discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
    8563          40 :                     discontinuityLeft[1] =
    8564          80 :                         XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
    8565          40 :                     discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
    8566          40 :                                                   dfYMidReprojectedBottom);
    8567          40 :                     discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
    8568          40 :                     discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
    8569             : 
    8570          40 :                     discontinuityRight[0] =
    8571          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8572          40 :                     discontinuityRight[1] =
    8573          80 :                         XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
    8574          40 :                     discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
    8575          40 :                                                    dfYMidReprojectedBottom);
    8576          40 :                     discontinuityRight[3] =
    8577          80 :                         XYPair(adfX1[iX + 1], adfY1[iX + 1]);
    8578          40 :                     discontinuityRight[4] =
    8579          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8580             : 
    8581          40 :                     sp.dfArea = getArea(discontinuityLeft) +
    8582          40 :                                 getArea(discontinuityRight);
    8583          40 :                     if (getInsideXSign(adfX0[iX]) >= 1)
    8584             :                     {
    8585          20 :                         sp.dfDstX1 = dfXMidReprojectedLeftTop;
    8586          20 :                         sp.dfDstY1 = dfYMidReprojectedTop;
    8587          20 :                         sp.dfDstX2 = dfXMidReprojectedLeftBottom;
    8588          20 :                         sp.dfDstY2 = dfYMidReprojectedBottom;
    8589             :                     }
    8590             :                     else
    8591             :                     {
    8592          20 :                         sp.dfDstX0 = dfXMidReprojectedRightTop;
    8593          20 :                         sp.dfDstY0 = dfYMidReprojectedTop;
    8594          20 :                         sp.dfDstX3 = dfXMidReprojectedRightBottom;
    8595          20 :                         sp.dfDstY3 = dfYMidReprojectedBottom;
    8596             :                     }
    8597             :                 }
    8598             : 
    8599             :                 // Bounding box of source pixel (expressed in target pixel
    8600             :                 // coordinates)
    8601             :                 CPLRectObj sRect;
    8602      103410 :                 sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
    8603      103410 :                                       std::min(sp.dfDstX2, sp.dfDstX3));
    8604      103410 :                 sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
    8605      103410 :                                       std::min(sp.dfDstY2, sp.dfDstY3));
    8606      103410 :                 sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
    8607      103410 :                                       std::max(sp.dfDstX2, sp.dfDstX3));
    8608      103410 :                 sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
    8609      103410 :                                       std::max(sp.dfDstY2, sp.dfDstY3));
    8610      103410 :                 if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
    8611      101350 :                       sRect.miny < iYMax && sRect.maxy > iYMin))
    8612             :                 {
    8613       10852 :                     continue;
    8614             :                 }
    8615             : 
    8616       92558 :                 sp.iSrcX = iX;
    8617       92558 :                 sp.iSrcY = iY;
    8618             : 
    8619       92558 :                 if (!bIsAffineNoRotation)
    8620             :                 {
    8621             :                     // Check polygon validity (no self-crossing)
    8622       89745 :                     XYPair xy;
    8623       89745 :                     if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
    8624       89745 :                                         XYPair(sp.dfDstX1, sp.dfDstY1),
    8625       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8626      269235 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
    8627       89745 :                         getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
    8628       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8629       89745 :                                         XYPair(sp.dfDstX0, sp.dfDstY0),
    8630      179490 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy))
    8631             :                     {
    8632         113 :                         continue;
    8633             :                     }
    8634             :                 }
    8635             : 
    8636       92445 :                 CPLQuadTreeInsertWithBounds(
    8637             :                     hQuadTree,
    8638             :                     reinterpret_cast<void *>(
    8639       92445 :                         static_cast<uintptr_t>(sourcePixels.size())),
    8640             :                     &sRect);
    8641             : 
    8642       92445 :                 sourcePixels.push_back(sp);
    8643             :             }
    8644             :         }
    8645             :     }
    8646             : 
    8647          36 :     std::vector<double> adfRealValue(poWK->nBands);
    8648          36 :     std::vector<double> adfImagValue(poWK->nBands);
    8649          36 :     std::vector<double> adfBandDensity(poWK->nBands);
    8650          36 :     std::vector<double> adfWeight(poWK->nBands);
    8651             : 
    8652             : #ifdef CHECK_SUM_WITH_GEOS
    8653             :     auto hGEOSContext = OGRGeometry::createGEOSContext();
    8654             :     auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8655             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
    8656             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
    8657             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
    8658             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
    8659             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
    8660             :     auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
    8661             :     auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
    8662             : 
    8663             :     auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8664             :     auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
    8665             :     auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
    8666             : #endif
    8667             : 
    8668             :     const XYPoly xy1{
    8669          36 :         {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
    8670          36 :     XYPoly xy2(5);
    8671          36 :     XYPoly xy2_triangle(4);
    8672          36 :     XYPoly intersection;
    8673             : 
    8674             :     /* ==================================================================== */
    8675             :     /*      Loop over output lines.                                         */
    8676             :     /* ==================================================================== */
    8677         891 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    8678             :     {
    8679             :         CPLRectObj sRect;
    8680         873 :         sRect.miny = iDstY;
    8681         873 :         sRect.maxy = iDstY + 1;
    8682             : 
    8683             :         /* ====================================================================
    8684             :          */
    8685             :         /*      Loop over pixels in output scanline. */
    8686             :         /* ====================================================================
    8687             :          */
    8688      221042 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    8689             :         {
    8690      220169 :             sRect.minx = iDstX;
    8691      220169 :             sRect.maxx = iDstX + 1;
    8692      220169 :             int nSourcePixels = 0;
    8693             :             void **pahSourcePixel =
    8694      220169 :                 CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
    8695      220169 :             if (nSourcePixels == 0)
    8696             :             {
    8697        1258 :                 CPLFree(pahSourcePixel);
    8698        1262 :                 continue;
    8699             :             }
    8700             : 
    8701      218911 :             std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
    8702      218911 :             std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
    8703      218911 :             std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
    8704      218911 :             std::fill(adfWeight.begin(), adfWeight.end(), 0);
    8705      218911 :             double dfDensity = 0;
    8706             :             // Just above zero to please Coveriy Scan
    8707      218911 :             double dfTotalWeight = std::numeric_limits<double>::min();
    8708             : 
    8709             :             /* ====================================================================
    8710             :              */
    8711             :             /*          Iterate over each contributing source pixel to add its
    8712             :              */
    8713             :             /*          value weighed by the ratio of the area of its
    8714             :              * intersection  */
    8715             :             /*          with the target pixel divided by the area of the source
    8716             :              */
    8717             :             /*          pixel. */
    8718             :             /* ====================================================================
    8719             :              */
    8720     1020520 :             for (int i = 0; i < nSourcePixels; ++i)
    8721             :             {
    8722      801614 :                 const int iSourcePixel = static_cast<int>(
    8723      801614 :                     reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
    8724      801614 :                 auto &sp = sourcePixels[iSourcePixel];
    8725             : 
    8726      801614 :                 double dfWeight = 0.0;
    8727      801614 :                 if (bIsAffineNoRotation)
    8728             :                 {
    8729             :                     // Optimization since the source pixel is a rectangle in
    8730             :                     // target pixel coordinates
    8731       16312 :                     double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
    8732       16312 :                     double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
    8733       16312 :                     double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
    8734       16312 :                     double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
    8735       16312 :                     double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
    8736       16312 :                     double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
    8737       16312 :                     double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
    8738       16312 :                     double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
    8739       16312 :                     dfWeight =
    8740       16312 :                         ((dfIntersMaxX - dfIntersMinX) *
    8741       16312 :                          (dfIntersMaxY - dfIntersMinY)) /
    8742       16312 :                         ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
    8743             :                 }
    8744             :                 else
    8745             :                 {
    8746             :                     // Compute the polygon of the source pixel in target pixel
    8747             :                     // coordinates, and shifted to the target pixel (unit square
    8748             :                     // coordinates)
    8749             : 
    8750      785302 :                     xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    8751      785302 :                     xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
    8752      785302 :                     xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
    8753      785302 :                     xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
    8754      785302 :                     xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    8755             : 
    8756      785302 :                     if (isConvex(xy2))
    8757             :                     {
    8758      785302 :                         getConvexPolyIntersection(xy1, xy2, intersection);
    8759      785302 :                         if (intersection.size() >= 3)
    8760             :                         {
    8761      468849 :                             dfWeight = getArea(intersection);
    8762             :                         }
    8763             :                     }
    8764             :                     else
    8765             :                     {
    8766             :                         // Split xy2 into 2 triangles.
    8767           0 :                         xy2_triangle[0] = xy2[0];
    8768           0 :                         xy2_triangle[1] = xy2[1];
    8769           0 :                         xy2_triangle[2] = xy2[2];
    8770           0 :                         xy2_triangle[3] = xy2[0];
    8771           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    8772             :                                                   intersection);
    8773           0 :                         if (intersection.size() >= 3)
    8774             :                         {
    8775           0 :                             dfWeight = getArea(intersection);
    8776             :                         }
    8777             : 
    8778           0 :                         xy2_triangle[1] = xy2[2];
    8779           0 :                         xy2_triangle[2] = xy2[3];
    8780           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    8781             :                                                   intersection);
    8782           0 :                         if (intersection.size() >= 3)
    8783             :                         {
    8784           0 :                             dfWeight += getArea(intersection);
    8785             :                         }
    8786             :                     }
    8787      785302 :                     if (dfWeight > 0.0)
    8788             :                     {
    8789      468828 :                         if (sp.dfArea == 0)
    8790       89592 :                             sp.dfArea = getArea(xy2);
    8791      468828 :                         dfWeight /= sp.dfArea;
    8792             :                     }
    8793             : 
    8794             : #ifdef CHECK_SUM_WITH_GEOS
    8795             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
    8796             :                                          sp.dfDstX0 - iDstX,
    8797             :                                          sp.dfDstY0 - iDstY);
    8798             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
    8799             :                                          sp.dfDstX1 - iDstX,
    8800             :                                          sp.dfDstY1 - iDstY);
    8801             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
    8802             :                                          sp.dfDstX2 - iDstX,
    8803             :                                          sp.dfDstY2 - iDstY);
    8804             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
    8805             :                                          sp.dfDstX3 - iDstX,
    8806             :                                          sp.dfDstY3 - iDstY);
    8807             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
    8808             :                                          sp.dfDstX0 - iDstX,
    8809             :                                          sp.dfDstY0 - iDstY);
    8810             : 
    8811             :                     double dfWeightGEOS = 0.0;
    8812             :                     auto hIntersection =
    8813             :                         GEOSIntersection_r(hGEOSContext, hP1, hP2);
    8814             :                     if (hIntersection)
    8815             :                     {
    8816             :                         double dfIntersArea = 0.0;
    8817             :                         if (GEOSArea_r(hGEOSContext, hIntersection,
    8818             :                                        &dfIntersArea) &&
    8819             :                             dfIntersArea > 0)
    8820             :                         {
    8821             :                             double dfSourceArea = 0.0;
    8822             :                             if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
    8823             :                             {
    8824             :                                 dfWeightGEOS = dfIntersArea / dfSourceArea;
    8825             :                             }
    8826             :                         }
    8827             :                         GEOSGeom_destroy_r(hGEOSContext, hIntersection);
    8828             :                     }
    8829             :                     if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
    8830             :                     {
    8831             :                         /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
    8832             :                                         dfWeight, dfWeightGEOS);
    8833             :                         printf("xy2: ");  // ok
    8834             :                         for (const auto &xy : xy2)
    8835             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    8836             :                         printf("\n");                                   // ok
    8837             :                         printf("intersection: ");                       // ok
    8838             :                         for (const auto &xy : intersection)
    8839             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    8840             :                         printf("\n");                                   // ok
    8841             :                     }
    8842             : #endif
    8843             :                 }
    8844      801614 :                 if (dfWeight > 0.0)
    8845             :                 {
    8846      474099 :                     const GPtrDiff_t iSrcOffset =
    8847      474099 :                         sp.iSrcX +
    8848      474099 :                         static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
    8849      474099 :                     dfTotalWeight += dfWeight;
    8850             : 
    8851      474099 :                     if (poWK->pafUnifiedSrcDensity != nullptr)
    8852             :                     {
    8853           0 :                         dfDensity +=
    8854           0 :                             dfWeight *
    8855           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    8856             :                     }
    8857             :                     else
    8858             :                     {
    8859      474099 :                         dfDensity += dfWeight;
    8860             :                     }
    8861             : 
    8862     1818720 :                     for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    8863             :                     {
    8864             :                         // Returns pixel value if it is not no data.
    8865             :                         double dfBandDensity;
    8866             :                         double dfRealValue;
    8867             :                         double dfImagValue;
    8868     2689240 :                         if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
    8869             :                                                &dfBandDensity, &dfRealValue,
    8870             :                                                &dfImagValue) &&
    8871     1344620 :                               dfBandDensity > BAND_DENSITY_THRESHOLD))
    8872             :                         {
    8873           0 :                             continue;
    8874             :                         }
    8875             : 
    8876     1344620 :                         adfRealValue[iBand] += dfRealValue * dfWeight;
    8877     1344620 :                         adfImagValue[iBand] += dfImagValue * dfWeight;
    8878     1344620 :                         adfBandDensity[iBand] += dfBandDensity * dfWeight;
    8879     1344620 :                         adfWeight[iBand] += dfWeight;
    8880             :                     }
    8881             :                 }
    8882             :             }
    8883             : 
    8884      218911 :             CPLFree(pahSourcePixel);
    8885             : 
    8886             :             /* --------------------------------------------------------------------
    8887             :              */
    8888             :             /*          Update destination pixel value. */
    8889             :             /* --------------------------------------------------------------------
    8890             :              */
    8891      218911 :             bool bHasFoundDensity = false;
    8892      218911 :             const GPtrDiff_t iDstOffset =
    8893      218911 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    8894      827822 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    8895             :             {
    8896      608911 :                 if (adfWeight[iBand] > 0)
    8897             :                 {
    8898             :                     const double dfBandDensity =
    8899      608907 :                         adfBandDensity[iBand] / adfWeight[iBand];
    8900      608907 :                     if (dfBandDensity > BAND_DENSITY_THRESHOLD)
    8901             :                     {
    8902      608907 :                         bHasFoundDensity = true;
    8903      608907 :                         GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    8904      608907 :                                          adfRealValue[iBand],
    8905      608907 :                                          adfImagValue[iBand]);
    8906             :                     }
    8907             :                 }
    8908             :             }
    8909             : 
    8910      218911 :             if (!bHasFoundDensity)
    8911           4 :                 continue;
    8912             : 
    8913             :             /* --------------------------------------------------------------------
    8914             :              */
    8915             :             /*          Update destination density/validity masks. */
    8916             :             /* --------------------------------------------------------------------
    8917             :              */
    8918      218907 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
    8919             : 
    8920      218907 :             if (poWK->panDstValid != nullptr)
    8921             :             {
    8922       11750 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    8923             :             }
    8924             :         }
    8925             : 
    8926             :         /* --------------------------------------------------------------------
    8927             :          */
    8928             :         /*      Report progress to the user, and optionally cancel out. */
    8929             :         /* --------------------------------------------------------------------
    8930             :          */
    8931         873 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    8932           0 :             break;
    8933             :     }
    8934             : 
    8935             : #ifdef CHECK_SUM_WITH_GEOS
    8936             :     GEOSGeom_destroy_r(hGEOSContext, hP1);
    8937             :     GEOSGeom_destroy_r(hGEOSContext, hP2);
    8938             :     OGRGeometry::freeGEOSContext(hGEOSContext);
    8939             : #endif
    8940          18 :     CPLQuadTreeDestroy(hQuadTree);
    8941          18 : }

Generated by: LCOV version 1.14