LCOV - code coverage report
Current view: top level - alg - gdalwarpkernel.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 3122 3868 80.7 %
Date: 2025-02-20 10:14:44 Functions: 171 187 91.4 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  High Performance Image Reprojector
       4             :  * Purpose:  Implementation of the GDALWarpKernel class.  Implements the actual
       5             :  *           image warping for a "chunk" of input and output imagery already
       6             :  *           loaded into memory.
       7             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       8             :  *
       9             :  ******************************************************************************
      10             :  * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
      11             :  * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
      12             :  *
      13             :  * SPDX-License-Identifier: MIT
      14             :  ****************************************************************************/
      15             : 
      16             : #include "cpl_port.h"
      17             : #include "gdalwarper.h"
      18             : 
      19             : #include <cfloat>
      20             : #include <cmath>
      21             : #include <cstddef>
      22             : #include <cstdlib>
      23             : #include <cstring>
      24             : 
      25             : #include <algorithm>
      26             : #include <limits>
      27             : #include <mutex>
      28             : #include <new>
      29             : #include <utility>
      30             : #include <vector>
      31             : 
      32             : #include "cpl_atomic_ops.h"
      33             : #include "cpl_conv.h"
      34             : #include "cpl_error.h"
      35             : #include "cpl_float.h"
      36             : #include "cpl_mask.h"
      37             : #include "cpl_multiproc.h"
      38             : #include "cpl_progress.h"
      39             : #include "cpl_string.h"
      40             : #include "cpl_vsi.h"
      41             : #include "cpl_worker_thread_pool.h"
      42             : #include "cpl_quad_tree.h"
      43             : #include "gdal.h"
      44             : #include "gdal_alg.h"
      45             : #include "gdal_alg_priv.h"
      46             : #include "gdal_thread_pool.h"
      47             : #include "gdalresamplingkernels.h"
      48             : #include "gdalwarpkernel_opencl.h"
      49             : 
      50             : // #define CHECK_SUM_WITH_GEOS
      51             : #ifdef CHECK_SUM_WITH_GEOS
      52             : #include "ogr_geometry.h"
      53             : #include "ogr_geos.h"
      54             : #endif
      55             : 
      56             : #ifdef USE_NEON_OPTIMIZATIONS
      57             : #include "include_sse2neon.h"
      58             : #define USE_SSE2
      59             : 
      60             : #include "gdalsse_priv.h"
      61             : 
      62             : // We restrict to 64bit processors because they are guaranteed to have SSE2.
      63             : // Could possibly be used too on 32bit, but we would need to check at runtime.
      64             : #elif defined(__x86_64) || defined(_M_X64)
      65             : #define USE_SSE2
      66             : 
      67             : #include "gdalsse_priv.h"
      68             : 
      69             : #if __SSE4_1__
      70             : #include <smmintrin.h>
      71             : #endif
      72             : 
      73             : #if __SSE3__
      74             : #include <pmmintrin.h>
      75             : #endif
      76             : 
      77             : #endif
      78             : 
      79             : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
      80             : constexpr float SRC_DENSITY_THRESHOLD = 0.000000001f;
      81             : 
      82             : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
      83             : 
      84             : static const int anGWKFilterRadius[] = {
      85             :     0,  // Nearest neighbour
      86             :     1,  // Bilinear
      87             :     2,  // Cubic Convolution (Catmull-Rom)
      88             :     2,  // Cubic B-Spline
      89             :     3,  // Lanczos windowed sinc
      90             :     0,  // Average
      91             :     0,  // Mode
      92             :     0,  // Reserved GRA_Gauss=7
      93             :     0,  // Max
      94             :     0,  // Min
      95             :     0,  // Med
      96             :     0,  // Q1
      97             :     0,  // Q3
      98             :     0,  // Sum
      99             :     0,  // RMS
     100             : };
     101             : 
     102             : static double GWKBilinear(double dfX);
     103             : static double GWKCubic(double dfX);
     104             : static double GWKBSpline(double dfX);
     105             : static double GWKLanczosSinc(double dfX);
     106             : 
     107             : static const FilterFuncType apfGWKFilter[] = {
     108             :     nullptr,         // Nearest neighbour
     109             :     GWKBilinear,     // Bilinear
     110             :     GWKCubic,        // Cubic Convolution (Catmull-Rom)
     111             :     GWKBSpline,      // Cubic B-Spline
     112             :     GWKLanczosSinc,  // Lanczos windowed sinc
     113             :     nullptr,         // Average
     114             :     nullptr,         // Mode
     115             :     nullptr,         // Reserved GRA_Gauss=7
     116             :     nullptr,         // Max
     117             :     nullptr,         // Min
     118             :     nullptr,         // Med
     119             :     nullptr,         // Q1
     120             :     nullptr,         // Q3
     121             :     nullptr,         // Sum
     122             :     nullptr,         // RMS
     123             : };
     124             : 
     125             : // TODO(schwehr): Can we make these functions have a const * const arg?
     126             : static double GWKBilinear4Values(double *padfVals);
     127             : static double GWKCubic4Values(double *padfVals);
     128             : static double GWKBSpline4Values(double *padfVals);
     129             : static double GWKLanczosSinc4Values(double *padfVals);
     130             : 
     131             : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
     132             :     nullptr,                // Nearest neighbour
     133             :     GWKBilinear4Values,     // Bilinear
     134             :     GWKCubic4Values,        // Cubic Convolution (Catmull-Rom)
     135             :     GWKBSpline4Values,      // Cubic B-Spline
     136             :     GWKLanczosSinc4Values,  // Lanczos windowed sinc
     137             :     nullptr,                // Average
     138             :     nullptr,                // Mode
     139             :     nullptr,                // Reserved GRA_Gauss=7
     140             :     nullptr,                // Max
     141             :     nullptr,                // Min
     142             :     nullptr,                // Med
     143             :     nullptr,                // Q1
     144             :     nullptr,                // Q3
     145             :     nullptr,                // Sum
     146             :     nullptr,                // RMS
     147             : };
     148             : 
     149        9631 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
     150             : {
     151             :     static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
     152             :                   "Bad size of anGWKFilterRadius");
     153        9631 :     return anGWKFilterRadius[eResampleAlg];
     154             : }
     155             : 
     156        3700 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
     157             : {
     158             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
     159             :                   "Bad size of apfGWKFilter");
     160        3700 :     return apfGWKFilter[eResampleAlg];
     161             : }
     162             : 
     163        3699 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
     164             : {
     165             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
     166             :                   "Bad size of apfGWKFilter4Values");
     167        3699 :     return apfGWKFilter4Values[eResampleAlg];
     168             : }
     169             : 
     170             : #ifdef HAVE_OPENCL
     171             : static CPLErr GWKOpenCLCase(GDALWarpKernel *);
     172             : #endif
     173             : 
     174             : static CPLErr GWKGeneralCase(GDALWarpKernel *);
     175             : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
     176             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     177             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     178             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     179             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     180             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     181             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     182             : #endif
     183             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     184             : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
     185             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     186             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     187             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     188             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     189             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     190             : #endif
     191             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     192             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     193             : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
     194             : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
     195             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     196             : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
     197             : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
     198             : static CPLErr GWKSumPreserving(GDALWarpKernel *);
     199             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     200             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     201             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     202             : 
     203             : /************************************************************************/
     204             : /*                           GWKJobStruct                               */
     205             : /************************************************************************/
     206             : 
     207             : struct GWKJobStruct
     208             : {
     209             :     std::mutex &mutex;
     210             :     std::condition_variable &cv;
     211             :     int &counter;
     212             :     bool &stopFlag;
     213             :     GDALWarpKernel *poWK;
     214             :     int iYMin;
     215             :     int iYMax;
     216             :     int (*pfnProgress)(GWKJobStruct *psJob);
     217             :     void *pTransformerArg;
     218             :     void (*pfnFunc)(
     219             :         void *);  // used by GWKRun() to assign the proper pTransformerArg
     220             : 
     221        2052 :     GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
     222             :                  int &counter_, bool &stopFlag_)
     223        2052 :         : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_),
     224             :           poWK(nullptr), iYMin(0), iYMax(0), pfnProgress(nullptr),
     225        2052 :           pTransformerArg(nullptr), pfnFunc(nullptr)
     226             :     {
     227        2052 :     }
     228             : };
     229             : 
     230             : struct GWKThreadData
     231             : {
     232             :     std::unique_ptr<CPLJobQueue> poJobQueue{};
     233             :     std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
     234             :     int nMaxThreads{0};
     235             :     int counter{0};
     236             :     bool stopFlag{false};
     237             :     std::mutex mutex{};
     238             :     std::condition_variable cv{};
     239             :     bool bTransformerArgInputAssignedToThread{false};
     240             :     void *pTransformerArgInput{
     241             :         nullptr};  // owned by calling layer. Not to be destroyed
     242             :     std::map<GIntBig, void *> mapThreadToTransformerArg{};
     243             :     int nTotalThreadCountForThisRun = 0;
     244             :     int nCurThreadCountForThisRun = 0;
     245             : };
     246             : 
     247             : /************************************************************************/
     248             : /*                        GWKProgressThread()                           */
     249             : /************************************************************************/
     250             : 
     251             : // Return TRUE if the computation must be interrupted.
     252          18 : static int GWKProgressThread(GWKJobStruct *psJob)
     253             : {
     254          18 :     bool stop = false;
     255             :     {
     256          18 :         std::lock_guard<std::mutex> lock(psJob->mutex);
     257          18 :         psJob->counter++;
     258          18 :         stop = psJob->stopFlag;
     259             :     }
     260          18 :     psJob->cv.notify_one();
     261             : 
     262          18 :     return stop;
     263             : }
     264             : 
     265             : /************************************************************************/
     266             : /*                      GWKProgressMonoThread()                         */
     267             : /************************************************************************/
     268             : 
     269             : // Return TRUE if the computation must be interrupted.
     270      198871 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
     271             : {
     272      198871 :     GDALWarpKernel *poWK = psJob->poWK;
     273             :     // coverity[missing_lock]
     274      198871 :     if (!poWK->pfnProgress(
     275      198871 :             poWK->dfProgressBase +
     276      198871 :                 poWK->dfProgressScale *
     277      198871 :                     (++psJob->counter / static_cast<double>(psJob->iYMax)),
     278             :             "", poWK->pProgress))
     279             :     {
     280           1 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     281           1 :         psJob->stopFlag = true;
     282           1 :         return TRUE;
     283             :     }
     284      198870 :     return FALSE;
     285             : }
     286             : 
     287             : /************************************************************************/
     288             : /*                       GWKGenericMonoThread()                         */
     289             : /************************************************************************/
     290             : 
     291        2047 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
     292             :                                    void (*pfnFunc)(void *pUserData))
     293             : {
     294        2047 :     GWKThreadData td;
     295             : 
     296             :     // NOTE: the mutex is not used.
     297        2047 :     GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
     298        2047 :     job.poWK = poWK;
     299        2047 :     job.iYMin = 0;
     300        2047 :     job.iYMax = poWK->nDstYSize;
     301        2047 :     job.pfnProgress = GWKProgressMonoThread;
     302        2047 :     job.pTransformerArg = poWK->pTransformerArg;
     303        2047 :     pfnFunc(&job);
     304             : 
     305        4094 :     return td.stopFlag ? CE_Failure : CE_None;
     306             : }
     307             : 
     308             : /************************************************************************/
     309             : /*                          GWKThreadsCreate()                          */
     310             : /************************************************************************/
     311             : 
     312        1421 : void *GWKThreadsCreate(char **papszWarpOptions,
     313             :                        GDALTransformerFunc /* pfnTransformer */,
     314             :                        void *pTransformerArg)
     315             : {
     316             :     const char *pszWarpThreads =
     317        1421 :         CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
     318        1421 :     if (pszWarpThreads == nullptr)
     319        1421 :         pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
     320             : 
     321        1421 :     int nThreads = 0;
     322        1421 :     if (EQUAL(pszWarpThreads, "ALL_CPUS"))
     323           3 :         nThreads = CPLGetNumCPUs();
     324             :     else
     325        1418 :         nThreads = atoi(pszWarpThreads);
     326        1421 :     if (nThreads <= 1)
     327        1416 :         nThreads = 0;
     328        1421 :     if (nThreads > 128)
     329           0 :         nThreads = 128;
     330             : 
     331        1421 :     GWKThreadData *psThreadData = new GWKThreadData();
     332             :     auto poThreadPool =
     333        1421 :         nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
     334        1421 :     if (nThreads && poThreadPool)
     335             :     {
     336           5 :         psThreadData->nMaxThreads = nThreads;
     337           5 :         psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
     338             :             nThreads,
     339           5 :             GWKJobStruct(psThreadData->mutex, psThreadData->cv,
     340          10 :                          psThreadData->counter, psThreadData->stopFlag)));
     341             : 
     342           5 :         psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
     343           5 :         psThreadData->pTransformerArgInput = pTransformerArg;
     344             :     }
     345             : 
     346        1421 :     return psThreadData;
     347             : }
     348             : 
     349             : /************************************************************************/
     350             : /*                             GWKThreadsEnd()                          */
     351             : /************************************************************************/
     352             : 
     353        1421 : void GWKThreadsEnd(void *psThreadDataIn)
     354             : {
     355        1421 :     if (psThreadDataIn == nullptr)
     356           0 :         return;
     357             : 
     358        1421 :     GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
     359        1421 :     if (psThreadData->poJobQueue)
     360             :     {
     361             :         // cppcheck-suppress constVariableReference
     362          15 :         for (auto &pair : psThreadData->mapThreadToTransformerArg)
     363             :         {
     364          10 :             CPLAssert(pair.second != psThreadData->pTransformerArgInput);
     365          10 :             GDALDestroyTransformer(pair.second);
     366             :         }
     367           5 :         psThreadData->poJobQueue.reset();
     368             :     }
     369        1421 :     delete psThreadData;
     370             : }
     371             : 
     372             : /************************************************************************/
     373             : /*                         ThreadFuncAdapter()                          */
     374             : /************************************************************************/
     375             : 
     376          15 : static void ThreadFuncAdapter(void *pData)
     377             : {
     378          15 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
     379          15 :     GWKThreadData *psThreadData =
     380          15 :         static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
     381             : 
     382             :     // Look if we have already a per-thread transformer
     383          15 :     void *pTransformerArg = nullptr;
     384          15 :     const GIntBig nThreadId = CPLGetPID();
     385             : 
     386             :     {
     387          30 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     388          15 :         ++psThreadData->nCurThreadCountForThisRun;
     389             : 
     390          15 :         auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
     391          15 :         if (oIter != psThreadData->mapThreadToTransformerArg.end())
     392             :         {
     393           0 :             pTransformerArg = oIter->second;
     394             :         }
     395          15 :         else if (!psThreadData->bTransformerArgInputAssignedToThread &&
     396          15 :                  psThreadData->nCurThreadCountForThisRun ==
     397          15 :                      psThreadData->nTotalThreadCountForThisRun)
     398             :         {
     399             :             // If we are the last thread to be started, temporarily borrow the
     400             :             // original transformer
     401           5 :             psThreadData->bTransformerArgInputAssignedToThread = true;
     402           5 :             pTransformerArg = psThreadData->pTransformerArgInput;
     403           5 :             psThreadData->mapThreadToTransformerArg[nThreadId] =
     404             :                 pTransformerArg;
     405             :         }
     406             : 
     407          15 :         if (pTransformerArg == nullptr)
     408             :         {
     409          10 :             CPLAssert(psThreadData->pTransformerArgInput != nullptr);
     410          10 :             CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
     411             :         }
     412             :     }
     413             : 
     414             :     // If no transformer assigned to current thread, instantiate one
     415          15 :     if (pTransformerArg == nullptr)
     416             :     {
     417             :         // This somehow assumes that GDALCloneTransformer() is thread-safe
     418             :         // which should normally be the case.
     419             :         pTransformerArg =
     420          10 :             GDALCloneTransformer(psThreadData->pTransformerArgInput);
     421             : 
     422             :         // Lock for the stop flag and the transformer map.
     423          10 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     424          10 :         if (!pTransformerArg)
     425             :         {
     426           0 :             psJob->stopFlag = true;
     427           0 :             return;
     428             :         }
     429          10 :         psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
     430             :     }
     431             : 
     432          15 :     psJob->pTransformerArg = pTransformerArg;
     433          15 :     psJob->pfnFunc(pData);
     434             : 
     435             :     // Give back original transformer, if borrowed.
     436             :     {
     437          30 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     438          15 :         if (psThreadData->bTransformerArgInputAssignedToThread &&
     439           5 :             pTransformerArg == psThreadData->pTransformerArgInput)
     440             :         {
     441             :             psThreadData->mapThreadToTransformerArg.erase(
     442           5 :                 psThreadData->mapThreadToTransformerArg.find(nThreadId));
     443           5 :             psThreadData->bTransformerArgInputAssignedToThread = false;
     444             :         }
     445             :     }
     446             : }
     447             : 
     448             : /************************************************************************/
     449             : /*                                GWKRun()                              */
     450             : /************************************************************************/
     451             : 
     452        2052 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
     453             :                      void (*pfnFunc)(void *pUserData))
     454             : 
     455             : {
     456        2052 :     const int nDstYSize = poWK->nDstYSize;
     457             : 
     458        2052 :     CPLDebug("GDAL",
     459             :              "GDALWarpKernel()::%s() "
     460             :              "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
     461             :              pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
     462             :              poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
     463             :              poWK->nDstYSize);
     464             : 
     465        2052 :     if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
     466             :     {
     467           0 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     468           0 :         return CE_Failure;
     469             :     }
     470             : 
     471        2052 :     GWKThreadData *psThreadData =
     472             :         static_cast<GWKThreadData *>(poWK->psThreadData);
     473        2052 :     if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
     474             :     {
     475        2047 :         return GWKGenericMonoThread(poWK, pfnFunc);
     476             :     }
     477             : 
     478           5 :     int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
     479             :     // Config option mostly useful for tests to be able to test multithreading
     480             :     // with small rasters
     481             :     const int nWarpChunkSize =
     482           5 :         atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
     483           5 :     if (nWarpChunkSize > 0)
     484             :     {
     485           3 :         GIntBig nChunks =
     486           3 :             static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
     487           3 :         if (nThreads > nChunks)
     488           1 :             nThreads = static_cast<int>(nChunks);
     489             :     }
     490           5 :     if (nThreads <= 0)
     491           1 :         nThreads = 1;
     492             : 
     493           5 :     CPLDebug("WARP", "Using %d threads", nThreads);
     494             : 
     495           5 :     auto &jobs = *psThreadData->threadJobs;
     496           5 :     CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
     497             :     // Fill-in job structures.
     498          20 :     for (int i = 0; i < nThreads; ++i)
     499             :     {
     500          15 :         auto &job = jobs[i];
     501          15 :         job.poWK = poWK;
     502          15 :         job.iYMin =
     503          15 :             static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
     504          15 :         job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
     505          15 :                                      nThreads);
     506          15 :         if (poWK->pfnProgress != GDALDummyProgress)
     507           1 :             job.pfnProgress = GWKProgressThread;
     508          15 :         job.pfnFunc = pfnFunc;
     509             :     }
     510             : 
     511             :     bool bStopFlag;
     512             :     {
     513           5 :         std::unique_lock<std::mutex> lock(psThreadData->mutex);
     514             : 
     515           5 :         psThreadData->nTotalThreadCountForThisRun = nThreads;
     516             :         // coverity[missing_lock]
     517           5 :         psThreadData->nCurThreadCountForThisRun = 0;
     518             : 
     519             :         // Start jobs.
     520          20 :         for (int i = 0; i < nThreads; ++i)
     521             :         {
     522          15 :             auto &job = jobs[i];
     523          15 :             psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
     524             :                                                 static_cast<void *>(&job));
     525             :         }
     526             : 
     527             :         /* --------------------------------------------------------------------
     528             :          */
     529             :         /*      Report progress. */
     530             :         /* --------------------------------------------------------------------
     531             :          */
     532           5 :         if (poWK->pfnProgress != GDALDummyProgress)
     533             :         {
     534           1 :             while (psThreadData->counter < nDstYSize)
     535             :             {
     536           1 :                 psThreadData->cv.wait(lock);
     537           1 :                 if (!poWK->pfnProgress(poWK->dfProgressBase +
     538           1 :                                            poWK->dfProgressScale *
     539           1 :                                                (psThreadData->counter /
     540           1 :                                                 static_cast<double>(nDstYSize)),
     541             :                                        "", poWK->pProgress))
     542             :                 {
     543           1 :                     CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     544           1 :                     psThreadData->stopFlag = true;
     545           1 :                     break;
     546             :                 }
     547             :             }
     548             :         }
     549             : 
     550           5 :         bStopFlag = psThreadData->stopFlag;
     551             :     }
     552             : 
     553             :     /* -------------------------------------------------------------------- */
     554             :     /*      Wait for all jobs to complete.                                  */
     555             :     /* -------------------------------------------------------------------- */
     556           5 :     psThreadData->poJobQueue->WaitCompletion();
     557             : 
     558           5 :     return bStopFlag ? CE_Failure : CE_None;
     559             : }
     560             : 
     561             : /************************************************************************/
     562             : /* ==================================================================== */
     563             : /*                            GDALWarpKernel                            */
     564             : /* ==================================================================== */
     565             : /************************************************************************/
     566             : 
     567             : /**
     568             :  * \class GDALWarpKernel "gdalwarper.h"
     569             :  *
     570             :  * Low level image warping class.
     571             :  *
     572             :  * This class is responsible for low level image warping for one
     573             :  * "chunk" of imagery.  The class is essentially a structure with all
     574             :  * data members public - primarily so that new special-case functions
     575             :  * can be added without changing the class declaration.
     576             :  *
     577             :  * Applications are normally intended to interactive with warping facilities
     578             :  * through the GDALWarpOperation class, though the GDALWarpKernel can in
     579             :  * theory be used directly if great care is taken in setting up the
     580             :  * control data.
     581             :  *
     582             :  * <h3>Design Issues</h3>
     583             :  *
     584             :  * The intention is that PerformWarp() would analyze the setup in terms
     585             :  * of the datatype, resampling type, and validity/density mask usage and
     586             :  * pick one of many specific implementations of the warping algorithm over
     587             :  * a continuum of optimization vs. generality.  At one end there will be a
     588             :  * reference general purpose implementation of the algorithm that supports
     589             :  * any data type (working internally in double precision complex), all three
     590             :  * resampling types, and any or all of the validity/density masks.  At the
     591             :  * other end would be highly optimized algorithms for common cases like
     592             :  * nearest neighbour resampling on GDT_Byte data with no masks.
     593             :  *
     594             :  * The full set of optimized versions have not been decided but we should
     595             :  * expect to have at least:
     596             :  *  - One for each resampling algorithm for 8bit data with no masks.
     597             :  *  - One for each resampling algorithm for float data with no masks.
     598             :  *  - One for each resampling algorithm for float data with any/all masks
     599             :  *    (essentially the generic case for just float data).
     600             :  *  - One for each resampling algorithm for 8bit data with support for
     601             :  *    input validity masks (per band or per pixel).  This handles the common
     602             :  *    case of nodata masking.
     603             :  *  - One for each resampling algorithm for float data with support for
     604             :  *    input validity masks (per band or per pixel).  This handles the common
     605             :  *    case of nodata masking.
     606             :  *
     607             :  * Some of the specializations would operate on all bands in one pass
     608             :  * (especially the ones without masking would do this), while others might
     609             :  * process each band individually to reduce code complexity.
     610             :  *
     611             :  * <h3>Masking Semantics</h3>
     612             :  *
     613             :  * A detailed explanation of the semantics of the validity and density masks,
     614             :  * and their effects on resampling kernels is needed here.
     615             :  */
     616             : 
     617             : /************************************************************************/
     618             : /*                     GDALWarpKernel Data Members                      */
     619             : /************************************************************************/
     620             : 
     621             : /**
     622             :  * \var GDALResampleAlg GDALWarpKernel::eResample;
     623             :  *
     624             :  * Resampling algorithm.
     625             :  *
     626             :  * The resampling algorithm to use.  One of GRA_NearestNeighbour, GRA_Bilinear,
     627             :  * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
     628             :  * GRA_Mode or GRA_Sum.
     629             :  *
     630             :  * This field is required. GDT_NearestNeighbour may be used as a default
     631             :  * value.
     632             :  */
     633             : 
     634             : /**
     635             :  * \var GDALDataType GDALWarpKernel::eWorkingDataType;
     636             :  *
     637             :  * Working pixel data type.
     638             :  *
     639             :  * The datatype of pixels in the source image (papabySrcimage) and
     640             :  * destination image (papabyDstImage) buffers.  Note that operations on
     641             :  * some data types (such as GDT_Byte) may be much better optimized than other
     642             :  * less common cases.
     643             :  *
     644             :  * This field is required.  It may not be GDT_Unknown.
     645             :  */
     646             : 
     647             : /**
     648             :  * \var int GDALWarpKernel::nBands;
     649             :  *
     650             :  * Number of bands.
     651             :  *
     652             :  * The number of bands (layers) of imagery being warped.  Determines the
     653             :  * number of entries in the papabySrcImage, papanBandSrcValid,
     654             :  * and papabyDstImage arrays.
     655             :  *
     656             :  * This field is required.
     657             :  */
     658             : 
     659             : /**
     660             :  * \var int GDALWarpKernel::nSrcXSize;
     661             :  *
     662             :  * Source image width in pixels.
     663             :  *
     664             :  * This field is required.
     665             :  */
     666             : 
     667             : /**
     668             :  * \var int GDALWarpKernel::nSrcYSize;
     669             :  *
     670             :  * Source image height in pixels.
     671             :  *
     672             :  * This field is required.
     673             :  */
     674             : 
     675             : /**
     676             :  * \var double GDALWarpKernel::dfSrcXExtraSize;
     677             :  *
     678             :  * Number of pixels included in nSrcXSize that are present on the edges of
     679             :  * the area of interest to take into account the width of the kernel.
     680             :  *
     681             :  * This field is required.
     682             :  */
     683             : 
     684             : /**
     685             :  * \var double GDALWarpKernel::dfSrcYExtraSize;
     686             :  *
     687             :  * Number of pixels included in nSrcYExtraSize that are present on the edges of
     688             :  * the area of interest to take into account the height of the kernel.
     689             :  *
     690             :  * This field is required.
     691             :  */
     692             : 
     693             : /**
     694             :  * \var int GDALWarpKernel::papabySrcImage;
     695             :  *
     696             :  * Array of source image band data.
     697             :  *
     698             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     699             :  * to image data.  Each individual band of image data is organized as a single
     700             :  * block of image data in left to right, then bottom to top order.  The actual
     701             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     702             :  *
     703             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     704             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     705             :  * this:
     706             :  *
     707             :  * \code
     708             :  *   float dfPixelValue;
     709             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     710             :  *   int   nPixel = 3; // Zero based.
     711             :  *   int   nLine = 4;  // Zero based.
     712             :  *
     713             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     714             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     715             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     716             :  *   dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
     717             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     718             :  * \endcode
     719             :  *
     720             :  * This field is required.
     721             :  */
     722             : 
     723             : /**
     724             :  * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
     725             :  *
     726             :  * Per band validity mask for source pixels.
     727             :  *
     728             :  * Array of pixel validity mask layers for each source band.   Each of
     729             :  * the mask layers is the same size (in pixels) as the source image with
     730             :  * one bit per pixel.  Note that it is legal (and common) for this to be
     731             :  * NULL indicating that none of the pixels are invalidated, or for some
     732             :  * band validity masks to be NULL in which case all pixels of the band are
     733             :  * valid.  The following code can be used to test the validity of a particular
     734             :  * pixel.
     735             :  *
     736             :  * \code
     737             :  *   int   bIsValid = TRUE;
     738             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     739             :  *   int   nPixel = 3; // Zero based.
     740             :  *   int   nLine = 4;  // Zero based.
     741             :  *
     742             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     743             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     744             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     745             :  *
     746             :  *   if( poKern->papanBandSrcValid != NULL
     747             :  *       && poKern->papanBandSrcValid[nBand] != NULL )
     748             :  *   {
     749             :  *       GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
     750             :  *       int    iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
     751             :  *
     752             :  *       bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
     753             :  *   }
     754             :  * \endcode
     755             :  */
     756             : 
     757             : /**
     758             :  * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
     759             :  *
     760             :  * Per pixel validity mask for source pixels.
     761             :  *
     762             :  * A single validity mask layer that applies to the pixels of all source
     763             :  * bands.  It is accessed similarly to papanBandSrcValid, but without the
     764             :  * extra level of band indirection.
     765             :  *
     766             :  * This pointer may be NULL indicating that all pixels are valid.
     767             :  *
     768             :  * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
     769             :  * the pixel isn't considered to be valid unless both arrays indicate it is
     770             :  * valid.
     771             :  */
     772             : 
     773             : /**
     774             :  * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
     775             :  *
     776             :  * Per pixel density mask for source pixels.
     777             :  *
     778             :  * A single density mask layer that applies to the pixels of all source
     779             :  * bands.  It contains values between 0.0 and 1.0 indicating the degree to
     780             :  * which this pixel should be allowed to contribute to the output result.
     781             :  *
     782             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     783             :  *
     784             :  * The density for a pixel may be accessed like this:
     785             :  *
     786             :  * \code
     787             :  *   float fDensity = 1.0;
     788             :  *   int nPixel = 3;  // Zero based.
     789             :  *   int nLine = 4;   // Zero based.
     790             :  *
     791             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     792             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     793             :  *   if( poKern->pafUnifiedSrcDensity != NULL )
     794             :  *     fDensity = poKern->pafUnifiedSrcDensity
     795             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     796             :  * \endcode
     797             :  */
     798             : 
     799             : /**
     800             :  * \var int GDALWarpKernel::nDstXSize;
     801             :  *
     802             :  * Width of destination image in pixels.
     803             :  *
     804             :  * This field is required.
     805             :  */
     806             : 
     807             : /**
     808             :  * \var int GDALWarpKernel::nDstYSize;
     809             :  *
     810             :  * Height of destination image in pixels.
     811             :  *
     812             :  * This field is required.
     813             :  */
     814             : 
     815             : /**
     816             :  * \var GByte **GDALWarpKernel::papabyDstImage;
     817             :  *
     818             :  * Array of destination image band data.
     819             :  *
     820             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     821             :  * to image data.  Each individual band of image data is organized as a single
     822             :  * block of image data in left to right, then bottom to top order.  The actual
     823             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     824             :  *
     825             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     826             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     827             :  * this:
     828             :  *
     829             :  * \code
     830             :  *   float dfPixelValue;
     831             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     832             :  *   int   nPixel = 3; // Zero based.
     833             :  *   int   nLine = 4;  // Zero based.
     834             :  *
     835             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     836             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     837             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     838             :  *   dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
     839             :  *                                  [nPixel + nLine * poKern->nSrcYSize];
     840             :  * \endcode
     841             :  *
     842             :  * This field is required.
     843             :  */
     844             : 
     845             : /**
     846             :  * \var GUInt32 *GDALWarpKernel::panDstValid;
     847             :  *
     848             :  * Per pixel validity mask for destination pixels.
     849             :  *
     850             :  * A single validity mask layer that applies to the pixels of all destination
     851             :  * bands.  It is accessed similarly to papanUnitifiedSrcValid, but based
     852             :  * on the size of the destination image.
     853             :  *
     854             :  * This pointer may be NULL indicating that all pixels are valid.
     855             :  */
     856             : 
     857             : /**
     858             :  * \var float *GDALWarpKernel::pafDstDensity;
     859             :  *
     860             :  * Per pixel density mask for destination pixels.
     861             :  *
     862             :  * A single density mask layer that applies to the pixels of all destination
     863             :  * bands.  It contains values between 0.0 and 1.0.
     864             :  *
     865             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     866             :  *
     867             :  * The density for a pixel may be accessed like this:
     868             :  *
     869             :  * \code
     870             :  *   float fDensity = 1.0;
     871             :  *   int   nPixel = 3; // Zero based.
     872             :  *   int   nLine = 4;  // Zero based.
     873             :  *
     874             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     875             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     876             :  *   if( poKern->pafDstDensity != NULL )
     877             :  *     fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
     878             :  * \endcode
     879             :  */
     880             : 
     881             : /**
     882             :  * \var int GDALWarpKernel::nSrcXOff;
     883             :  *
     884             :  * X offset to source pixel coordinates for transformation.
     885             :  *
     886             :  * See pfnTransformer.
     887             :  *
     888             :  * This field is required.
     889             :  */
     890             : 
     891             : /**
     892             :  * \var int GDALWarpKernel::nSrcYOff;
     893             :  *
     894             :  * Y offset to source pixel coordinates for transformation.
     895             :  *
     896             :  * See pfnTransformer.
     897             :  *
     898             :  * This field is required.
     899             :  */
     900             : 
     901             : /**
     902             :  * \var int GDALWarpKernel::nDstXOff;
     903             :  *
     904             :  * X offset to destination pixel coordinates for transformation.
     905             :  *
     906             :  * See pfnTransformer.
     907             :  *
     908             :  * This field is required.
     909             :  */
     910             : 
     911             : /**
     912             :  * \var int GDALWarpKernel::nDstYOff;
     913             :  *
     914             :  * Y offset to destination pixel coordinates for transformation.
     915             :  *
     916             :  * See pfnTransformer.
     917             :  *
     918             :  * This field is required.
     919             :  */
     920             : 
     921             : /**
     922             :  * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
     923             :  *
     924             :  * Source/destination location transformer.
     925             :  *
     926             :  * The function to call to transform coordinates between source image
     927             :  * pixel/line coordinates and destination image pixel/line coordinates.
     928             :  * See GDALTransformerFunc() for details of the semantics of this function.
     929             :  *
     930             :  * The GDALWarpKern algorithm will only ever use this transformer in
     931             :  * "destination to source" mode (bDstToSrc=TRUE), and will always pass
     932             :  * partial or complete scanlines of points in the destination image as
     933             :  * input.  This means, among other things, that it is safe to the
     934             :  * approximating transform GDALApproxTransform() as the transformation
     935             :  * function.
     936             :  *
     937             :  * Source and destination images may be subsets of a larger overall image.
     938             :  * The transformation algorithms will expect and return pixel/line coordinates
     939             :  * in terms of this larger image, so coordinates need to be offset by
     940             :  * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
     941             :  * passing to pfnTransformer, and after return from it.
     942             :  *
     943             :  * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
     944             :  * data to this function when it is called.
     945             :  *
     946             :  * This field is required.
     947             :  */
     948             : 
     949             : /**
     950             :  * \var void *GDALWarpKernel::pTransformerArg;
     951             :  *
     952             :  * Callback data for pfnTransformer.
     953             :  *
     954             :  * This field may be NULL if not required for the pfnTransformer being used.
     955             :  */
     956             : 
     957             : /**
     958             :  * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
     959             :  *
     960             :  * The function to call to report progress of the algorithm, and to check
     961             :  * for a requested termination of the operation.  It operates according to
     962             :  * GDALProgressFunc() semantics.
     963             :  *
     964             :  * Generally speaking the progress function will be invoked for each
     965             :  * scanline of the destination buffer that has been processed.
     966             :  *
     967             :  * This field may be NULL (internally set to GDALDummyProgress()).
     968             :  */
     969             : 
     970             : /**
     971             :  * \var void *GDALWarpKernel::pProgress;
     972             :  *
     973             :  * Callback data for pfnProgress.
     974             :  *
     975             :  * This field may be NULL if not required for the pfnProgress being used.
     976             :  */
     977             : 
     978             : /************************************************************************/
     979             : /*                           GDALWarpKernel()                           */
     980             : /************************************************************************/
     981             : 
     982        2358 : GDALWarpKernel::GDALWarpKernel()
     983             :     : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
     984             :       eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
     985             :       dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
     986             :       papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
     987             :       pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
     988             :       papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
     989             :       dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
     990             :       nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
     991             :       nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
     992             :       pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
     993             :       pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
     994             :       padfDstNoDataReal(nullptr), psThreadData(nullptr),
     995        2358 :       eTieStrategy(GWKTS_First)
     996             : {
     997        2358 : }
     998             : 
     999             : /************************************************************************/
    1000             : /*                          ~GDALWarpKernel()                           */
    1001             : /************************************************************************/
    1002             : 
    1003        2358 : GDALWarpKernel::~GDALWarpKernel()
    1004             : {
    1005        2358 : }
    1006             : 
    1007             : /************************************************************************/
    1008             : /*                            PerformWarp()                             */
    1009             : /************************************************************************/
    1010             : 
    1011             : /**
    1012             :  * \fn CPLErr GDALWarpKernel::PerformWarp();
    1013             :  *
    1014             :  * This method performs the warp described in the GDALWarpKernel.
    1015             :  *
    1016             :  * @return CE_None on success or CE_Failure if an error occurs.
    1017             :  */
    1018             : 
    1019        2356 : CPLErr GDALWarpKernel::PerformWarp()
    1020             : 
    1021             : {
    1022        2356 :     const CPLErr eErr = Validate();
    1023             : 
    1024        2356 :     if (eErr != CE_None)
    1025           1 :         return eErr;
    1026             : 
    1027             :     // See #2445 and #3079.
    1028        2355 :     if (nSrcXSize <= 0 || nSrcYSize <= 0)
    1029             :     {
    1030         303 :         if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
    1031             :         {
    1032           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    1033           0 :             return CE_Failure;
    1034             :         }
    1035         303 :         return CE_None;
    1036             :     }
    1037             : 
    1038             :     /* -------------------------------------------------------------------- */
    1039             :     /*      Pre-calculate resampling scales and window sizes for filtering. */
    1040             :     /* -------------------------------------------------------------------- */
    1041             : 
    1042        2052 :     dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
    1043        2052 :     dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
    1044        2052 :     if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
    1045        1283 :         dfXScale = 1.0;
    1046        2052 :     if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
    1047        1036 :         dfYScale = 1.0;
    1048        2052 :     if (dfXScale < 1.0)
    1049             :     {
    1050         548 :         double dfXReciprocalScale = 1.0 / dfXScale;
    1051         548 :         const int nXReciprocalScale =
    1052         548 :             static_cast<int>(dfXReciprocalScale + 0.5);
    1053         548 :         if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
    1054         431 :             dfXScale = 1.0 / nXReciprocalScale;
    1055             :     }
    1056        2052 :     if (dfYScale < 1.0)
    1057             :     {
    1058         517 :         double dfYReciprocalScale = 1.0 / dfYScale;
    1059         517 :         const int nYReciprocalScale =
    1060         517 :             static_cast<int>(dfYReciprocalScale + 0.5);
    1061         517 :         if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
    1062         368 :             dfYScale = 1.0 / nYReciprocalScale;
    1063             :     }
    1064             : 
    1065             :     // XSCALE and YSCALE undocumented for now. Can help in some cases.
    1066             :     // Best would probably be a per-pixel scale computation.
    1067        2052 :     const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
    1068        2052 :     if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
    1069           1 :         dfXScale = CPLAtof(pszXScale);
    1070        2052 :     const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
    1071        2052 :     if (pszYScale != nullptr)
    1072           1 :         dfYScale = CPLAtof(pszYScale);
    1073             : 
    1074             :     // If the xscale is significantly lower than the yscale, this is highly
    1075             :     // suspicious of a situation of wrapping a very large virtual file in
    1076             :     // geographic coordinates with left and right parts being close to the
    1077             :     // antimeridian. In that situation, the xscale computed by the above method
    1078             :     // is completely wrong. Prefer doing an average of a few sample points
    1079             :     // instead
    1080        2052 :     if ((dfYScale / dfXScale > 100 ||
    1081           1 :          (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
    1082             :     {
    1083             :         // Sample points along a grid
    1084           4 :         const int nPointsX = std::min(10, nDstXSize);
    1085           4 :         const int nPointsY = std::min(10, nDstYSize);
    1086           4 :         const int nPoints = 3 * nPointsX * nPointsY;
    1087           8 :         std::vector<double> padfX;
    1088           8 :         std::vector<double> padfY;
    1089           8 :         std::vector<double> padfZ(nPoints);
    1090           8 :         std::vector<int> pabSuccess(nPoints);
    1091          44 :         for (int iY = 0; iY < nPointsY; iY++)
    1092             :         {
    1093         440 :             for (int iX = 0; iX < nPointsX; iX++)
    1094             :             {
    1095         400 :                 const double dfX =
    1096             :                     nPointsX == 1
    1097         400 :                         ? 0.0
    1098         400 :                         : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
    1099         400 :                 const double dfY =
    1100             :                     nPointsY == 1
    1101         400 :                         ? 0.0
    1102         400 :                         : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
    1103             : 
    1104             :                 // Reproject each destination sample point and its neighbours
    1105             :                 // at (x+1,y) and (x,y+1), so as to get the local scale.
    1106         400 :                 padfX.push_back(dfX);
    1107         400 :                 padfY.push_back(dfY);
    1108             : 
    1109         400 :                 padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
    1110         400 :                 padfY.push_back(dfY);
    1111             : 
    1112         400 :                 padfX.push_back(dfX);
    1113         400 :                 padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
    1114             :             }
    1115             :         }
    1116           4 :         pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
    1117           4 :                        &padfZ[0], &pabSuccess[0]);
    1118             : 
    1119             :         // Compute the xscale at each sampling point
    1120           8 :         std::vector<double> adfXScales;
    1121         404 :         for (int i = 0; i < nPoints; i += 3)
    1122             :         {
    1123         400 :             if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
    1124             :             {
    1125             :                 const double dfPointXScale =
    1126         400 :                     1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
    1127         800 :                                    std::abs(padfX[i + 2] - padfX[i]));
    1128         400 :                 adfXScales.push_back(dfPointXScale);
    1129             :             }
    1130             :         }
    1131             : 
    1132             :         // Sort by increasing xcale
    1133           4 :         std::sort(adfXScales.begin(), adfXScales.end());
    1134             : 
    1135           4 :         if (!adfXScales.empty())
    1136             :         {
    1137             :             // Compute the average of scales, but eliminate outliers small
    1138             :             // scales, if some samples are just along the discontinuity.
    1139           4 :             const double dfMaxPointXScale = adfXScales.back();
    1140           4 :             double dfSumPointXScale = 0;
    1141           4 :             int nCountPointScale = 0;
    1142         404 :             for (double dfPointXScale : adfXScales)
    1143             :             {
    1144         400 :                 if (dfPointXScale > dfMaxPointXScale / 10)
    1145             :                 {
    1146         398 :                     dfSumPointXScale += dfPointXScale;
    1147         398 :                     nCountPointScale++;
    1148             :                 }
    1149             :             }
    1150           4 :             if (nCountPointScale > 0)  // should always be true
    1151             :             {
    1152           4 :                 const double dfXScaleFromSampling =
    1153           4 :                     dfSumPointXScale / nCountPointScale;
    1154             : #if DEBUG_VERBOSE
    1155             :                 CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
    1156             :                          dfXScaleFromSampling);
    1157             : #endif
    1158           4 :                 dfXScale = dfXScaleFromSampling;
    1159             :             }
    1160             :         }
    1161             :     }
    1162             : 
    1163             : #if DEBUG_VERBOSE
    1164             :     CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
    1165             : #endif
    1166             : 
    1167        2052 :     const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
    1168             : 
    1169             :     // Safety check for callers that would use GDALWarpKernel without using
    1170             :     // GDALWarpOperation.
    1171        1989 :     if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
    1172        1926 :          ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
    1173        4104 :           !bUse4SamplesFormula)) &&
    1174         388 :         atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
    1175             :             WARP_EXTRA_ELTS)
    1176             :     {
    1177           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1178             :                  "Source arrays must have WARP_EXTRA_ELTS extra elements at "
    1179             :                  "their end. "
    1180             :                  "See GDALWarpKernel class definition. If this condition is "
    1181             :                  "fulfilled, define a EXTRA_ELTS=%d warp options",
    1182             :                  WARP_EXTRA_ELTS);
    1183           0 :         return CE_Failure;
    1184             :     }
    1185             : 
    1186        2052 :     dfXFilter = anGWKFilterRadius[eResample];
    1187        2052 :     dfYFilter = anGWKFilterRadius[eResample];
    1188             : 
    1189        2052 :     nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
    1190        1586 :                               : static_cast<int>(dfXFilter);
    1191        2052 :     nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
    1192        1563 :                               : static_cast<int>(dfYFilter);
    1193             : 
    1194             :     // Filter window offset depends on the parity of the kernel radius.
    1195        2052 :     nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
    1196        2052 :     nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
    1197             : 
    1198        2052 :     bApplyVerticalShift =
    1199        2052 :         CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
    1200        2052 :     dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
    1201        2052 :         papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
    1202             : 
    1203             :     /* -------------------------------------------------------------------- */
    1204             :     /*      Set up resampling functions.                                    */
    1205             :     /* -------------------------------------------------------------------- */
    1206        2052 :     if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
    1207          12 :         return GWKGeneralCase(this);
    1208             : 
    1209             : #if defined(HAVE_OPENCL)
    1210         589 :     if ((eWorkingDataType == GDT_Byte || eWorkingDataType == GDT_CInt16 ||
    1211         417 :          eWorkingDataType == GDT_UInt16 || eWorkingDataType == GDT_Int16 ||
    1212         277 :          eWorkingDataType == GDT_CFloat32 || eWorkingDataType == GDT_Float32) &&
    1213        1885 :         (eResample == GRA_Bilinear || eResample == GRA_Cubic ||
    1214        1440 :          eResample == GRA_CubicSpline || eResample == GRA_Lanczos) &&
    1215        4608 :         !bApplyVerticalShift &&
    1216             :         // OpenCL warping gives different results than the ones expected by autotest,
    1217             :         // so disable it by default even if found.
    1218        1056 :         CPLTestBool(
    1219         528 :             CSLFetchNameValueDef(papszWarpOptions, "USE_OPENCL",
    1220             :                                  CPLGetConfigOption("GDAL_USE_OPENCL", "NO"))))
    1221             :     {
    1222           0 :         if (pafUnifiedSrcDensity != nullptr)
    1223             :         {
    1224             :             // If pafUnifiedSrcDensity is only set to 1.0, then we can
    1225             :             // discard it.
    1226           0 :             bool bFoundNotOne = false;
    1227           0 :             for (GPtrDiff_t j = 0;
    1228           0 :                  j < static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize; j++)
    1229             :             {
    1230           0 :                 if (pafUnifiedSrcDensity[j] != 1.0)
    1231             :                 {
    1232           0 :                     bFoundNotOne = true;
    1233           0 :                     break;
    1234             :                 }
    1235             :             }
    1236           0 :             if (!bFoundNotOne)
    1237             :             {
    1238           0 :                 CPLFree(pafUnifiedSrcDensity);
    1239           0 :                 pafUnifiedSrcDensity = nullptr;
    1240             :             }
    1241             :         }
    1242             : 
    1243           0 :         if (pafUnifiedSrcDensity != nullptr)
    1244             :         {
    1245             :             // Typically if there's a cutline or an alpha band
    1246           0 :             CPLDebugOnce("WARP", "pafUnifiedSrcDensity is not null, "
    1247             :                                  "hence OpenCL warper cannot be used");
    1248             :         }
    1249             :         else
    1250             :         {
    1251           0 :             const CPLErr eResult = GWKOpenCLCase(this);
    1252             : 
    1253             :             // CE_Warning tells us a suitable OpenCL environment was not available
    1254             :             // so we fall through to other CPU based methods.
    1255           0 :             if (eResult != CE_Warning)
    1256           0 :                 return eResult;
    1257             :         }
    1258             :     }
    1259             : #endif  // defined HAVE_OPENCL
    1260             : 
    1261        2040 :     const bool bNoMasksOrDstDensityOnly =
    1262        2036 :         papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
    1263        4076 :         pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
    1264             : 
    1265        2040 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour &&
    1266             :         bNoMasksOrDstDensityOnly)
    1267         866 :         return GWKNearestNoMasksOrDstDensityOnlyByte(this);
    1268             : 
    1269        1174 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear &&
    1270             :         bNoMasksOrDstDensityOnly)
    1271         126 :         return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
    1272             : 
    1273        1048 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_Cubic &&
    1274             :         bNoMasksOrDstDensityOnly)
    1275          72 :         return GWKCubicNoMasksOrDstDensityOnlyByte(this);
    1276             : 
    1277         976 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline &&
    1278             :         bNoMasksOrDstDensityOnly)
    1279          12 :         return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
    1280             : 
    1281         964 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour)
    1282         274 :         return GWKNearestByte(this);
    1283             : 
    1284         690 :     if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
    1285         140 :         eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
    1286          18 :         return GWKNearestNoMasksOrDstDensityOnlyShort(this);
    1287             : 
    1288         672 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
    1289             :         bNoMasksOrDstDensityOnly)
    1290           5 :         return GWKCubicNoMasksOrDstDensityOnlyShort(this);
    1291             : 
    1292         667 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
    1293             :         bNoMasksOrDstDensityOnly)
    1294           6 :         return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
    1295             : 
    1296         661 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
    1297             :         bNoMasksOrDstDensityOnly)
    1298          18 :         return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
    1299             : 
    1300         643 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
    1301             :         bNoMasksOrDstDensityOnly)
    1302          12 :         return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
    1303             : 
    1304         631 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
    1305             :         bNoMasksOrDstDensityOnly)
    1306           5 :         return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
    1307             : 
    1308         626 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
    1309             :         bNoMasksOrDstDensityOnly)
    1310           6 :         return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
    1311             : 
    1312         620 :     if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
    1313          24 :         return GWKNearestShort(this);
    1314             : 
    1315         596 :     if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
    1316           0 :         return GWKNearestUnsignedShort(this);
    1317             : 
    1318         596 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
    1319             :         bNoMasksOrDstDensityOnly)
    1320          11 :         return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
    1321             : 
    1322         585 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
    1323          37 :         return GWKNearestFloat(this);
    1324             : 
    1325         548 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
    1326             :         bNoMasksOrDstDensityOnly)
    1327           5 :         return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
    1328             : 
    1329         543 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
    1330             :         bNoMasksOrDstDensityOnly)
    1331           9 :         return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
    1332             : 
    1333             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    1334             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
    1335             :         bNoMasksOrDstDensityOnly)
    1336             :         return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
    1337             : 
    1338             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
    1339             :         bNoMasksOrDstDensityOnly)
    1340             :         return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
    1341             : #endif
    1342             : 
    1343         534 :     if (eResample == GRA_Average)
    1344          71 :         return GWKAverageOrMode(this);
    1345             : 
    1346         463 :     if (eResample == GRA_RMS)
    1347           9 :         return GWKAverageOrMode(this);
    1348             : 
    1349         454 :     if (eResample == GRA_Mode)
    1350          23 :         return GWKAverageOrMode(this);
    1351             : 
    1352         431 :     if (eResample == GRA_Max)
    1353           6 :         return GWKAverageOrMode(this);
    1354             : 
    1355         425 :     if (eResample == GRA_Min)
    1356           5 :         return GWKAverageOrMode(this);
    1357             : 
    1358         420 :     if (eResample == GRA_Med)
    1359           6 :         return GWKAverageOrMode(this);
    1360             : 
    1361         414 :     if (eResample == GRA_Q1)
    1362           5 :         return GWKAverageOrMode(this);
    1363             : 
    1364         409 :     if (eResample == GRA_Q3)
    1365           5 :         return GWKAverageOrMode(this);
    1366             : 
    1367         404 :     if (eResample == GRA_Sum)
    1368          18 :         return GWKSumPreserving(this);
    1369             : 
    1370         386 :     if (!GDALDataTypeIsComplex(eWorkingDataType))
    1371             :     {
    1372         155 :         return GWKRealCase(this);
    1373             :     }
    1374             : 
    1375         231 :     return GWKGeneralCase(this);
    1376             : }
    1377             : 
    1378             : /************************************************************************/
    1379             : /*                              Validate()                              */
    1380             : /************************************************************************/
    1381             : 
    1382             : /**
    1383             :  * \fn CPLErr GDALWarpKernel::Validate()
    1384             :  *
    1385             :  * Check the settings in the GDALWarpKernel, and issue a CPLError()
    1386             :  * (and return CE_Failure) if the configuration is considered to be
    1387             :  * invalid for some reason.
    1388             :  *
    1389             :  * This method will also do some standard defaulting such as setting
    1390             :  * pfnProgress to GDALDummyProgress() if it is NULL.
    1391             :  *
    1392             :  * @return CE_None on success or CE_Failure if an error is detected.
    1393             :  */
    1394             : 
    1395        2356 : CPLErr GDALWarpKernel::Validate()
    1396             : 
    1397             : {
    1398        2356 :     if (static_cast<size_t>(eResample) >=
    1399             :         (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
    1400             :     {
    1401           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1402             :                  "Unsupported resampling method %d.",
    1403           0 :                  static_cast<int>(eResample));
    1404           0 :         return CE_Failure;
    1405             :     }
    1406             : 
    1407             :     // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
    1408             :     // be ignored as contributing source pixels during resampling. Only taken into account by
    1409             :     // Average currently
    1410             :     const char *pszExcludedValues =
    1411        2356 :         CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
    1412        2356 :     if (pszExcludedValues)
    1413             :     {
    1414             :         const CPLStringList aosTokens(
    1415           8 :             CSLTokenizeString2(pszExcludedValues, "(,)", 0));
    1416           8 :         if ((aosTokens.size() % nBands) != 0)
    1417             :         {
    1418           1 :             CPLError(CE_Failure, CPLE_AppDefined,
    1419             :                      "EXCLUDED_VALUES should contain one or several tuples of "
    1420             :                      "%d values formatted like <R>,<G>,<B> or "
    1421             :                      "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
    1422             :                      "tuples",
    1423             :                      nBands);
    1424           1 :             return CE_Failure;
    1425             :         }
    1426          14 :         std::vector<double> adfTuple;
    1427          28 :         for (int i = 0; i < aosTokens.size(); ++i)
    1428             :         {
    1429          21 :             adfTuple.push_back(CPLAtof(aosTokens[i]));
    1430          21 :             if (((i + 1) % nBands) == 0)
    1431             :             {
    1432           7 :                 m_aadfExcludedValues.push_back(adfTuple);
    1433           7 :                 adfTuple.clear();
    1434             :             }
    1435             :         }
    1436             :     }
    1437             : 
    1438        2355 :     return CE_None;
    1439             : }
    1440             : 
    1441             : /************************************************************************/
    1442             : /*                         GWKOverlayDensity()                          */
    1443             : /*                                                                      */
    1444             : /*      Compute the final density for the destination pixel.  This      */
    1445             : /*      is a function of the overlay density (passed in) and the        */
    1446             : /*      original density.                                               */
    1447             : /************************************************************************/
    1448             : 
    1449     7934110 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
    1450             :                               double dfDensity)
    1451             : {
    1452     7934110 :     if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
    1453     6743240 :         return;
    1454             : 
    1455     1190880 :     poWK->pafDstDensity[iDstOffset] = static_cast<float>(
    1456     1190880 :         1.0 - (1.0 - dfDensity) * (1.0 - poWK->pafDstDensity[iDstOffset]));
    1457             : }
    1458             : 
    1459             : /************************************************************************/
    1460             : /*                          GWKRoundValueT()                            */
    1461             : /************************************************************************/
    1462             : 
    1463             : template <class T, bool is_signed> struct sGWKRoundValueT
    1464             : {
    1465             :     static T eval(double);
    1466             : };
    1467             : 
    1468             : template <class T> struct sGWKRoundValueT<T, true> /* signed */
    1469             : {
    1470     2235130 :     static T eval(double dfValue)
    1471             :     {
    1472     2235130 :         return static_cast<T>(floor(dfValue + 0.5));
    1473             :     }
    1474             : };
    1475             : 
    1476             : template <class T> struct sGWKRoundValueT<T, false> /* unsigned */
    1477             : {
    1478    12928381 :     static T eval(double dfValue)
    1479             :     {
    1480    12928381 :         return static_cast<T>(dfValue + 0.5);
    1481             :     }
    1482             : };
    1483             : 
    1484    15154811 : template <class T> static T GWKRoundValueT(double dfValue)
    1485             : {
    1486    15154811 :     return sGWKRoundValueT<T, cpl::NumericLimits<T>::is_signed>::eval(dfValue);
    1487             : }
    1488             : 
    1489      269074 : template <> float GWKRoundValueT<float>(double dfValue)
    1490             : {
    1491      269074 :     return static_cast<float>(dfValue);
    1492             : }
    1493             : 
    1494             : #ifdef notused
    1495             : template <> double GWKRoundValueT<double>(double dfValue)
    1496             : {
    1497             :     return dfValue;
    1498             : }
    1499             : #endif
    1500             : 
    1501             : /************************************************************************/
    1502             : /*                            GWKClampValueT()                          */
    1503             : /************************************************************************/
    1504             : 
    1505    10366934 : template <class T> static CPL_INLINE T GWKClampValueT(double dfValue)
    1506             : {
    1507    10366934 :     if (dfValue < cpl::NumericLimits<T>::min())
    1508        3969 :         return cpl::NumericLimits<T>::min();
    1509    10375466 :     else if (dfValue > cpl::NumericLimits<T>::max())
    1510       18463 :         return cpl::NumericLimits<T>::max();
    1511             :     else
    1512    10345056 :         return GWKRoundValueT<T>(dfValue);
    1513             : }
    1514             : 
    1515      718914 : template <> float GWKClampValueT<float>(double dfValue)
    1516             : {
    1517      718914 :     return static_cast<float>(dfValue);
    1518             : }
    1519             : 
    1520             : #ifdef notused
    1521             : template <> double GWKClampValueT<double>(double dfValue)
    1522             : {
    1523             :     return dfValue;
    1524             : }
    1525             : #endif
    1526             : 
    1527             : /************************************************************************/
    1528             : /*                             AvoidNoData()                            */
    1529             : /************************************************************************/
    1530             : 
    1531             : template <class T>
    1532    11865687 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1533             :                         GPtrDiff_t iDstOffset)
    1534             : {
    1535    11865687 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1536    11865687 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1537             : 
    1538    11865687 :     if (poWK->padfDstNoDataReal != nullptr &&
    1539     5729937 :         poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
    1540             :     {
    1541             :         if constexpr (cpl::NumericLimits<T>::is_integer)
    1542             :         {
    1543        2637 :             if (pDst[iDstOffset] ==
    1544        2637 :                 static_cast<T>(cpl::NumericLimits<T>::lowest()))
    1545             :             {
    1546        2509 :                 pDst[iDstOffset] =
    1547        2509 :                     static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
    1548             :             }
    1549             :             else
    1550         128 :                 pDst[iDstOffset]--;
    1551             :         }
    1552             :         else
    1553             :         {
    1554          64 :             if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
    1555             :             {
    1556             :                 using std::nextafter;
    1557           0 :                 pDst[iDstOffset] =
    1558           0 :                     nextafter(pDst[iDstOffset], static_cast<T>(0));
    1559             :             }
    1560             :             else
    1561             :             {
    1562             :                 using std::nextafter;
    1563          64 :                 pDst[iDstOffset] =
    1564          64 :                     nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
    1565             :             }
    1566             :         }
    1567             : 
    1568        2701 :         if (!poWK->bWarnedAboutDstNoDataReplacement)
    1569             :         {
    1570          25 :             const_cast<GDALWarpKernel *>(poWK)
    1571             :                 ->bWarnedAboutDstNoDataReplacement = true;
    1572          25 :             CPLError(CE_Warning, CPLE_AppDefined,
    1573             :                      "Value %g in the source dataset has been changed to %g "
    1574             :                      "in the destination dataset to avoid being treated as "
    1575             :                      "NoData. To avoid this, select a different NoData value "
    1576             :                      "for the destination dataset.",
    1577          25 :                      poWK->padfDstNoDataReal[iBand],
    1578          25 :                      static_cast<double>(pDst[iDstOffset]));
    1579             :         }
    1580             :     }
    1581    11865687 : }
    1582             : 
    1583             : /************************************************************************/
    1584             : /*                         GWKSetPixelValueRealT()                      */
    1585             : /************************************************************************/
    1586             : 
    1587             : template <class T>
    1588     7159332 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
    1589             :                                   GPtrDiff_t iDstOffset, double dfDensity,
    1590             :                                   T value)
    1591             : {
    1592     7159332 :     T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    1593             : 
    1594             :     /* -------------------------------------------------------------------- */
    1595             :     /*      If the source density is less than 100% we need to fetch the    */
    1596             :     /*      existing destination value, and mix it with the source to       */
    1597             :     /*      get the new "to apply" value.  Also compute composite           */
    1598             :     /*      density.                                                        */
    1599             :     /*                                                                      */
    1600             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1601             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1602             :     /* -------------------------------------------------------------------- */
    1603     7159332 :     if (dfDensity < 0.9999)
    1604             :     {
    1605       81504 :         if (dfDensity < 0.0001)
    1606           0 :             return true;
    1607             : 
    1608       81504 :         double dfDstDensity = 1.0;
    1609             : 
    1610       81504 :         if (poWK->pafDstDensity != nullptr)
    1611       80032 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1612        1472 :         else if (poWK->panDstValid != nullptr &&
    1613           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1614           0 :             dfDstDensity = 0.0;
    1615             : 
    1616             :         // It seems like we also ought to be testing panDstValid[] here!
    1617             : 
    1618       81504 :         const double dfDstReal = pDst[iDstOffset];
    1619             : 
    1620             :         // The destination density is really only relative to the portion
    1621             :         // not occluded by the overlay.
    1622       81504 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1623             : 
    1624       81504 :         const double dfReal = (value * dfDensity + dfDstReal * dfDstInfluence) /
    1625       81504 :                               (dfDensity + dfDstInfluence);
    1626             : 
    1627             :         /* --------------------------------------------------------------------
    1628             :          */
    1629             :         /*      Actually apply the destination value. */
    1630             :         /*                                                                      */
    1631             :         /*      Avoid using the destination nodata value for integer datatypes
    1632             :          */
    1633             :         /*      if by chance it is equal to the computed pixel value. */
    1634             :         /* --------------------------------------------------------------------
    1635             :          */
    1636       81504 :         pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
    1637             :     }
    1638             :     else
    1639             :     {
    1640     7077823 :         pDst[iDstOffset] = value;
    1641             :     }
    1642             : 
    1643     7159332 :     AvoidNoData<T>(poWK, iBand, iDstOffset);
    1644             : 
    1645     7159332 :     return true;
    1646             : }
    1647             : 
    1648             : /************************************************************************/
    1649             : /*                       ClampRoundAndAvoidNoData()                     */
    1650             : /************************************************************************/
    1651             : 
    1652             : template <class T>
    1653     4706375 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1654             :                                      GPtrDiff_t iDstOffset, double dfReal)
    1655             : {
    1656     4706375 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1657     4706375 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1658             : 
    1659             :     if constexpr (cpl::NumericLimits<T>::is_integer)
    1660             :     {
    1661             :         using std::floor;
    1662     4223379 :         if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
    1663        1638 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
    1664     4221739 :         else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1665       13640 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
    1666             :         else if constexpr (cpl::NumericLimits<T>::is_signed)
    1667       13539 :             pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
    1668             :         else
    1669     4194560 :             pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
    1670             :     }
    1671             :     else
    1672             :     {
    1673      482996 :         pDst[iDstOffset] = static_cast<T>(dfReal);
    1674             :     }
    1675             : 
    1676     4706375 :     AvoidNoData<T>(poWK, iBand, iDstOffset);
    1677     4706375 : }
    1678             : 
    1679             : /************************************************************************/
    1680             : /*                          GWKSetPixelValue()                          */
    1681             : /************************************************************************/
    1682             : 
    1683     3867640 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
    1684             :                              GPtrDiff_t iDstOffset, double dfDensity,
    1685             :                              double dfReal, double dfImag)
    1686             : 
    1687             : {
    1688     3867640 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1689             : 
    1690             :     /* -------------------------------------------------------------------- */
    1691             :     /*      If the source density is less than 100% we need to fetch the    */
    1692             :     /*      existing destination value, and mix it with the source to       */
    1693             :     /*      get the new "to apply" value.  Also compute composite           */
    1694             :     /*      density.                                                        */
    1695             :     /*                                                                      */
    1696             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1697             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1698             :     /* -------------------------------------------------------------------- */
    1699     3867640 :     if (dfDensity < 0.9999)
    1700             :     {
    1701         800 :         if (dfDensity < 0.0001)
    1702           0 :             return true;
    1703             : 
    1704         800 :         double dfDstDensity = 1.0;
    1705         800 :         if (poWK->pafDstDensity != nullptr)
    1706         800 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1707           0 :         else if (poWK->panDstValid != nullptr &&
    1708           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1709           0 :             dfDstDensity = 0.0;
    1710             : 
    1711         800 :         double dfDstReal = 0.0;
    1712         800 :         double dfDstImag = 0.0;
    1713             :         // It seems like we also ought to be testing panDstValid[] here!
    1714             : 
    1715             :         // TODO(schwehr): Factor out this repreated type of set.
    1716         800 :         switch (poWK->eWorkingDataType)
    1717             :         {
    1718           0 :             case GDT_Byte:
    1719           0 :                 dfDstReal = pabyDst[iDstOffset];
    1720           0 :                 dfDstImag = 0.0;
    1721           0 :                 break;
    1722             : 
    1723           0 :             case GDT_Int8:
    1724           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1725           0 :                 dfDstImag = 0.0;
    1726           0 :                 break;
    1727             : 
    1728         400 :             case GDT_Int16:
    1729         400 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1730         400 :                 dfDstImag = 0.0;
    1731         400 :                 break;
    1732             : 
    1733         400 :             case GDT_UInt16:
    1734         400 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1735         400 :                 dfDstImag = 0.0;
    1736         400 :                 break;
    1737             : 
    1738           0 :             case GDT_Int32:
    1739           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1740           0 :                 dfDstImag = 0.0;
    1741           0 :                 break;
    1742             : 
    1743           0 :             case GDT_UInt32:
    1744           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1745           0 :                 dfDstImag = 0.0;
    1746           0 :                 break;
    1747             : 
    1748           0 :             case GDT_Int64:
    1749           0 :                 dfDstReal = static_cast<double>(
    1750           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1751           0 :                 dfDstImag = 0.0;
    1752           0 :                 break;
    1753             : 
    1754           0 :             case GDT_UInt64:
    1755           0 :                 dfDstReal = static_cast<double>(
    1756           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1757           0 :                 dfDstImag = 0.0;
    1758           0 :                 break;
    1759             : 
    1760           0 :             case GDT_Float16:
    1761           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    1762           0 :                 dfDstImag = 0.0;
    1763           0 :                 break;
    1764             : 
    1765           0 :             case GDT_Float32:
    1766           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
    1767           0 :                 dfDstImag = 0.0;
    1768           0 :                 break;
    1769             : 
    1770           0 :             case GDT_Float64:
    1771           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1772           0 :                 dfDstImag = 0.0;
    1773           0 :                 break;
    1774             : 
    1775           0 :             case GDT_CInt16:
    1776           0 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
    1777           0 :                 dfDstImag =
    1778           0 :                     reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
    1779           0 :                 break;
    1780             : 
    1781           0 :             case GDT_CInt32:
    1782           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
    1783           0 :                 dfDstImag =
    1784           0 :                     reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
    1785           0 :                 break;
    1786             : 
    1787           0 :             case GDT_CFloat16:
    1788             :                 dfDstReal =
    1789           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
    1790             :                 dfDstImag =
    1791           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
    1792           0 :                 break;
    1793             : 
    1794           0 :             case GDT_CFloat32:
    1795           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset * 2];
    1796           0 :                 dfDstImag =
    1797           0 :                     reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1];
    1798           0 :                 break;
    1799             : 
    1800           0 :             case GDT_CFloat64:
    1801           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
    1802           0 :                 dfDstImag =
    1803           0 :                     reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
    1804           0 :                 break;
    1805             : 
    1806           0 :             case GDT_Unknown:
    1807             :             case GDT_TypeCount:
    1808           0 :                 CPLAssert(false);
    1809             :                 return false;
    1810             :         }
    1811             : 
    1812             :         // The destination density is really only relative to the portion
    1813             :         // not occluded by the overlay.
    1814         800 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1815             : 
    1816         800 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    1817         800 :                  (dfDensity + dfDstInfluence);
    1818             : 
    1819         800 :         dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
    1820         800 :                  (dfDensity + dfDstInfluence);
    1821             :     }
    1822             : 
    1823             :     /* -------------------------------------------------------------------- */
    1824             :     /*      Actually apply the destination value.                           */
    1825             :     /*                                                                      */
    1826             :     /*      Avoid using the destination nodata value for integer datatypes  */
    1827             :     /*      if by chance it is equal to the computed pixel value.           */
    1828             :     /* -------------------------------------------------------------------- */
    1829             : 
    1830     3867640 :     switch (poWK->eWorkingDataType)
    1831             :     {
    1832     3141450 :         case GDT_Byte:
    1833     3141450 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
    1834     3141450 :             break;
    1835             : 
    1836           0 :         case GDT_Int8:
    1837           0 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
    1838           0 :             break;
    1839             : 
    1840        7470 :         case GDT_Int16:
    1841        7470 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
    1842        7470 :             break;
    1843             : 
    1844         463 :         case GDT_UInt16:
    1845         463 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
    1846         463 :             break;
    1847             : 
    1848          63 :         case GDT_UInt32:
    1849          63 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
    1850          63 :             break;
    1851             : 
    1852        3470 :         case GDT_Int32:
    1853        3470 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
    1854        3470 :             break;
    1855             : 
    1856           0 :         case GDT_UInt64:
    1857           0 :             ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
    1858             :                                                     dfReal);
    1859           0 :             break;
    1860             : 
    1861           0 :         case GDT_Int64:
    1862           0 :             ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
    1863             :                                                    dfReal);
    1864           0 :             break;
    1865             : 
    1866           0 :         case GDT_Float16:
    1867           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
    1868           0 :             break;
    1869             : 
    1870      478957 :         case GDT_Float32:
    1871      478957 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
    1872      478957 :             break;
    1873             : 
    1874         147 :         case GDT_Float64:
    1875         147 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
    1876         147 :             break;
    1877             : 
    1878      234178 :         case GDT_CInt16:
    1879             :         {
    1880             :             typedef GInt16 T;
    1881      234178 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    1882           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1883           0 :                     cpl::NumericLimits<T>::min();
    1884      234178 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1885           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1886           0 :                     cpl::NumericLimits<T>::max();
    1887             :             else
    1888      234178 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1889      234178 :                     static_cast<T>(floor(dfReal + 0.5));
    1890      234178 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    1891           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1892           0 :                     cpl::NumericLimits<T>::min();
    1893      234178 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    1894           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1895           0 :                     cpl::NumericLimits<T>::max();
    1896             :             else
    1897      234178 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1898      234178 :                     static_cast<T>(floor(dfImag + 0.5));
    1899      234178 :             break;
    1900             :         }
    1901             : 
    1902         478 :         case GDT_CInt32:
    1903             :         {
    1904             :             typedef GInt32 T;
    1905         478 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    1906           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1907           0 :                     cpl::NumericLimits<T>::min();
    1908         478 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1909           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1910           0 :                     cpl::NumericLimits<T>::max();
    1911             :             else
    1912         478 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1913         478 :                     static_cast<T>(floor(dfReal + 0.5));
    1914         478 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    1915           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1916           0 :                     cpl::NumericLimits<T>::min();
    1917         478 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    1918           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1919           0 :                     cpl::NumericLimits<T>::max();
    1920             :             else
    1921         478 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1922         478 :                     static_cast<T>(floor(dfImag + 0.5));
    1923         478 :             break;
    1924             :         }
    1925             : 
    1926           0 :         case GDT_CFloat16:
    1927           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
    1928           0 :                 static_cast<GFloat16>(dfReal);
    1929           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
    1930           0 :                 static_cast<GFloat16>(dfImag);
    1931           0 :             break;
    1932             : 
    1933         490 :         case GDT_CFloat32:
    1934         490 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
    1935         490 :                 static_cast<float>(dfReal);
    1936         490 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
    1937         490 :                 static_cast<float>(dfImag);
    1938         490 :             break;
    1939             : 
    1940         478 :         case GDT_CFloat64:
    1941         478 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
    1942         478 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
    1943         478 :             break;
    1944             : 
    1945           0 :         case GDT_Unknown:
    1946             :         case GDT_TypeCount:
    1947           0 :             return false;
    1948             :     }
    1949             : 
    1950     3867640 :     return true;
    1951             : }
    1952             : 
    1953             : /************************************************************************/
    1954             : /*                       GWKSetPixelValueReal()                         */
    1955             : /************************************************************************/
    1956             : 
    1957     1074360 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    1958             :                                  GPtrDiff_t iDstOffset, double dfDensity,
    1959             :                                  double dfReal)
    1960             : 
    1961             : {
    1962     1074360 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1963             : 
    1964             :     /* -------------------------------------------------------------------- */
    1965             :     /*      If the source density is less than 100% we need to fetch the    */
    1966             :     /*      existing destination value, and mix it with the source to       */
    1967             :     /*      get the new "to apply" value.  Also compute composite           */
    1968             :     /*      density.                                                        */
    1969             :     /*                                                                      */
    1970             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1971             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1972             :     /* -------------------------------------------------------------------- */
    1973     1074360 :     if (dfDensity < 0.9999)
    1974             :     {
    1975       78172 :         if (dfDensity < 0.0001)
    1976           0 :             return true;
    1977             : 
    1978       78172 :         double dfDstReal = 0.0;
    1979       78172 :         double dfDstDensity = 1.0;
    1980             : 
    1981       78172 :         if (poWK->pafDstDensity != nullptr)
    1982       78172 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1983           0 :         else if (poWK->panDstValid != nullptr &&
    1984           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1985           0 :             dfDstDensity = 0.0;
    1986             : 
    1987             :         // It seems like we also ought to be testing panDstValid[] here!
    1988             : 
    1989       78172 :         switch (poWK->eWorkingDataType)
    1990             :         {
    1991           0 :             case GDT_Byte:
    1992           0 :                 dfDstReal = pabyDst[iDstOffset];
    1993           0 :                 break;
    1994             : 
    1995           0 :             case GDT_Int8:
    1996           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1997           0 :                 break;
    1998             : 
    1999         300 :             case GDT_Int16:
    2000         300 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    2001         300 :                 break;
    2002             : 
    2003       77872 :             case GDT_UInt16:
    2004       77872 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    2005       77872 :                 break;
    2006             : 
    2007           0 :             case GDT_Int32:
    2008           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    2009           0 :                 break;
    2010             : 
    2011           0 :             case GDT_UInt32:
    2012           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    2013           0 :                 break;
    2014             : 
    2015           0 :             case GDT_Int64:
    2016           0 :                 dfDstReal = static_cast<double>(
    2017           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    2018           0 :                 break;
    2019             : 
    2020           0 :             case GDT_UInt64:
    2021           0 :                 dfDstReal = static_cast<double>(
    2022           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    2023           0 :                 break;
    2024             : 
    2025           0 :             case GDT_Float16:
    2026           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    2027           0 :                 break;
    2028             : 
    2029           0 :             case GDT_Float32:
    2030           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
    2031           0 :                 break;
    2032             : 
    2033           0 :             case GDT_Float64:
    2034           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    2035           0 :                 break;
    2036             : 
    2037           0 :             case GDT_CInt16:
    2038             :             case GDT_CInt32:
    2039             :             case GDT_CFloat16:
    2040             :             case GDT_CFloat32:
    2041             :             case GDT_CFloat64:
    2042             :             case GDT_Unknown:
    2043             :             case GDT_TypeCount:
    2044           0 :                 CPLAssert(false);
    2045             :                 return false;
    2046             :         }
    2047             : 
    2048             :         // The destination density is really only relative to the portion
    2049             :         // not occluded by the overlay.
    2050       78172 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    2051             : 
    2052       78172 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    2053       78172 :                  (dfDensity + dfDstInfluence);
    2054             :     }
    2055             : 
    2056             :     /* -------------------------------------------------------------------- */
    2057             :     /*      Actually apply the destination value.                           */
    2058             :     /*                                                                      */
    2059             :     /*      Avoid using the destination nodata value for integer datatypes  */
    2060             :     /*      if by chance it is equal to the computed pixel value.           */
    2061             :     /* -------------------------------------------------------------------- */
    2062             : 
    2063     1074360 :     switch (poWK->eWorkingDataType)
    2064             :     {
    2065      916752 :         case GDT_Byte:
    2066      916752 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
    2067      916752 :             break;
    2068             : 
    2069           0 :         case GDT_Int8:
    2070           0 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
    2071           0 :             break;
    2072             : 
    2073        1117 :         case GDT_Int16:
    2074        1117 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
    2075        1117 :             break;
    2076             : 
    2077      150735 :         case GDT_UInt16:
    2078      150735 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
    2079      150735 :             break;
    2080             : 
    2081         347 :         case GDT_UInt32:
    2082         347 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
    2083         347 :             break;
    2084             : 
    2085        1350 :         case GDT_Int32:
    2086        1350 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
    2087        1350 :             break;
    2088             : 
    2089          32 :         case GDT_UInt64:
    2090          32 :             ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
    2091             :                                                     dfReal);
    2092          32 :             break;
    2093             : 
    2094         132 :         case GDT_Int64:
    2095         132 :             ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
    2096             :                                                    dfReal);
    2097         132 :             break;
    2098             : 
    2099           0 :         case GDT_Float16:
    2100           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
    2101           0 :             break;
    2102             : 
    2103        3442 :         case GDT_Float32:
    2104        3442 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
    2105        3442 :             break;
    2106             : 
    2107         450 :         case GDT_Float64:
    2108         450 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
    2109         450 :             break;
    2110             : 
    2111           0 :         case GDT_CInt16:
    2112             :         case GDT_CInt32:
    2113             :         case GDT_CFloat16:
    2114             :         case GDT_CFloat32:
    2115             :         case GDT_CFloat64:
    2116           0 :             return false;
    2117             : 
    2118           0 :         case GDT_Unknown:
    2119             :         case GDT_TypeCount:
    2120           0 :             CPLAssert(false);
    2121             :             return false;
    2122             :     }
    2123             : 
    2124     1074360 :     return true;
    2125             : }
    2126             : 
    2127             : /************************************************************************/
    2128             : /*                          GWKGetPixelValue()                          */
    2129             : /************************************************************************/
    2130             : 
    2131             : /* It is assumed that panUnifiedSrcValid has been checked before */
    2132             : 
    2133    29336100 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
    2134             :                              GPtrDiff_t iSrcOffset, double *pdfDensity,
    2135             :                              double *pdfReal, double *pdfImag)
    2136             : 
    2137             : {
    2138    29336100 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2139             : 
    2140    58672300 :     if (poWK->papanBandSrcValid != nullptr &&
    2141    29336100 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2142           0 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2143             :     {
    2144           0 :         *pdfDensity = 0.0;
    2145           0 :         return false;
    2146             :     }
    2147             : 
    2148    29336100 :     *pdfReal = 0.0;
    2149    29336100 :     *pdfImag = 0.0;
    2150             : 
    2151             :     // TODO(schwehr): Fix casting.
    2152    29336100 :     switch (poWK->eWorkingDataType)
    2153             :     {
    2154    28245600 :         case GDT_Byte:
    2155    28245600 :             *pdfReal = pabySrc[iSrcOffset];
    2156    28245600 :             *pdfImag = 0.0;
    2157    28245600 :             break;
    2158             : 
    2159           0 :         case GDT_Int8:
    2160           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2161           0 :             *pdfImag = 0.0;
    2162           0 :             break;
    2163             : 
    2164       28226 :         case GDT_Int16:
    2165       28226 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2166       28226 :             *pdfImag = 0.0;
    2167       28226 :             break;
    2168             : 
    2169         163 :         case GDT_UInt16:
    2170         163 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2171         163 :             *pdfImag = 0.0;
    2172         163 :             break;
    2173             : 
    2174       13726 :         case GDT_Int32:
    2175       13726 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2176       13726 :             *pdfImag = 0.0;
    2177       13726 :             break;
    2178             : 
    2179          63 :         case GDT_UInt32:
    2180          63 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2181          63 :             *pdfImag = 0.0;
    2182          63 :             break;
    2183             : 
    2184           0 :         case GDT_Int64:
    2185           0 :             *pdfReal = static_cast<double>(
    2186           0 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2187           0 :             *pdfImag = 0.0;
    2188           0 :             break;
    2189             : 
    2190           0 :         case GDT_UInt64:
    2191           0 :             *pdfReal = static_cast<double>(
    2192           0 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2193           0 :             *pdfImag = 0.0;
    2194           0 :             break;
    2195             : 
    2196           0 :         case GDT_Float16:
    2197           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2198           0 :             *pdfImag = 0.0;
    2199           0 :             break;
    2200             : 
    2201     1047220 :         case GDT_Float32:
    2202     1047220 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
    2203     1047220 :             *pdfImag = 0.0;
    2204     1047220 :             break;
    2205             : 
    2206         582 :         case GDT_Float64:
    2207         582 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2208         582 :             *pdfImag = 0.0;
    2209         582 :             break;
    2210             : 
    2211         130 :         case GDT_CInt16:
    2212         130 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
    2213         130 :             *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2214         130 :             break;
    2215             : 
    2216         130 :         case GDT_CInt32:
    2217         130 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
    2218         130 :             *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
    2219         130 :             break;
    2220             : 
    2221           0 :         case GDT_CFloat16:
    2222           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
    2223           0 :             *pdfImag =
    2224           0 :                 reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2225           0 :             break;
    2226             : 
    2227         178 :         case GDT_CFloat32:
    2228         178 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2];
    2229         178 :             *pdfImag = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1];
    2230         178 :             break;
    2231             : 
    2232         130 :         case GDT_CFloat64:
    2233         130 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
    2234         130 :             *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
    2235         130 :             break;
    2236             : 
    2237           0 :         case GDT_Unknown:
    2238             :         case GDT_TypeCount:
    2239           0 :             CPLAssert(false);
    2240             :             *pdfDensity = 0.0;
    2241             :             return false;
    2242             :     }
    2243             : 
    2244    29336100 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2245     3015160 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2246             :     else
    2247    26321000 :         *pdfDensity = 1.0;
    2248             : 
    2249    29336100 :     return *pdfDensity != 0.0;
    2250             : }
    2251             : 
    2252             : /************************************************************************/
    2253             : /*                       GWKGetPixelValueReal()                         */
    2254             : /************************************************************************/
    2255             : 
    2256      151448 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2257             :                                  GPtrDiff_t iSrcOffset, double *pdfDensity,
    2258             :                                  double *pdfReal)
    2259             : 
    2260             : {
    2261      151448 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2262             : 
    2263      302898 :     if (poWK->papanBandSrcValid != nullptr &&
    2264      151450 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2265           2 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2266             :     {
    2267           0 :         *pdfDensity = 0.0;
    2268           0 :         return false;
    2269             :     }
    2270             : 
    2271      151448 :     switch (poWK->eWorkingDataType)
    2272             :     {
    2273           1 :         case GDT_Byte:
    2274           1 :             *pdfReal = pabySrc[iSrcOffset];
    2275           1 :             break;
    2276             : 
    2277           0 :         case GDT_Int8:
    2278           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2279           0 :             break;
    2280             : 
    2281           1 :         case GDT_Int16:
    2282           1 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2283           1 :             break;
    2284             : 
    2285      150357 :         case GDT_UInt16:
    2286      150357 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2287      150357 :             break;
    2288             : 
    2289         886 :         case GDT_Int32:
    2290         886 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2291         886 :             break;
    2292             : 
    2293          83 :         case GDT_UInt32:
    2294          83 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2295          83 :             break;
    2296             : 
    2297          16 :         case GDT_Int64:
    2298          16 :             *pdfReal = static_cast<double>(
    2299          16 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2300          16 :             break;
    2301             : 
    2302          16 :         case GDT_UInt64:
    2303          16 :             *pdfReal = static_cast<double>(
    2304          16 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2305          16 :             break;
    2306             : 
    2307           0 :         case GDT_Float16:
    2308           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2309           0 :             break;
    2310             : 
    2311           2 :         case GDT_Float32:
    2312           2 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
    2313           2 :             break;
    2314             : 
    2315          86 :         case GDT_Float64:
    2316          86 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2317          86 :             break;
    2318             : 
    2319           0 :         case GDT_CInt16:
    2320             :         case GDT_CInt32:
    2321             :         case GDT_CFloat16:
    2322             :         case GDT_CFloat32:
    2323             :         case GDT_CFloat64:
    2324             :         case GDT_Unknown:
    2325             :         case GDT_TypeCount:
    2326           0 :             CPLAssert(false);
    2327             :             return false;
    2328             :     }
    2329             : 
    2330      151448 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2331      150340 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2332             :     else
    2333        1108 :         *pdfDensity = 1.0;
    2334             : 
    2335      151448 :     return *pdfDensity != 0.0;
    2336             : }
    2337             : 
    2338             : /************************************************************************/
    2339             : /*                          GWKGetPixelRow()                            */
    2340             : /************************************************************************/
    2341             : 
    2342             : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
    2343             : /* data-types. */
    2344             : 
    2345     2354130 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
    2346             :                            GPtrDiff_t iSrcOffset, int nHalfSrcLen,
    2347             :                            double *padfDensity, double adfReal[],
    2348             :                            double *padfImag)
    2349             : {
    2350             :     // We know that nSrcLen is even, so we can *always* unroll loops 2x.
    2351     2354130 :     const int nSrcLen = nHalfSrcLen * 2;
    2352     2354130 :     bool bHasValid = false;
    2353             : 
    2354     2354130 :     if (padfDensity != nullptr)
    2355             :     {
    2356             :         // Init the density.
    2357     3346330 :         for (int i = 0; i < nSrcLen; i += 2)
    2358             :         {
    2359     2189790 :             padfDensity[i] = 1.0;
    2360     2189790 :             padfDensity[i + 1] = 1.0;
    2361             :         }
    2362             : 
    2363     1156540 :         if (poWK->panUnifiedSrcValid != nullptr)
    2364             :         {
    2365     3281460 :             for (int i = 0; i < nSrcLen; i += 2)
    2366             :             {
    2367     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
    2368     2067740 :                     bHasValid = true;
    2369             :                 else
    2370       74323 :                     padfDensity[i] = 0.0;
    2371             : 
    2372     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
    2373     2068400 :                     bHasValid = true;
    2374             :                 else
    2375       73668 :                     padfDensity[i + 1] = 0.0;
    2376             :             }
    2377             : 
    2378             :             // Reset or fail as needed.
    2379     1139400 :             if (bHasValid)
    2380     1116590 :                 bHasValid = false;
    2381             :             else
    2382       22806 :                 return false;
    2383             :         }
    2384             : 
    2385     1133730 :         if (poWK->papanBandSrcValid != nullptr &&
    2386           0 :             poWK->papanBandSrcValid[iBand] != nullptr)
    2387             :         {
    2388           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2389             :             {
    2390           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
    2391           0 :                     bHasValid = true;
    2392             :                 else
    2393           0 :                     padfDensity[i] = 0.0;
    2394             : 
    2395           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
    2396           0 :                                iSrcOffset + i + 1))
    2397           0 :                     bHasValid = true;
    2398             :                 else
    2399           0 :                     padfDensity[i + 1] = 0.0;
    2400             :             }
    2401             : 
    2402             :             // Reset or fail as needed.
    2403           0 :             if (bHasValid)
    2404           0 :                 bHasValid = false;
    2405             :             else
    2406           0 :                 return false;
    2407             :         }
    2408             :     }
    2409             : 
    2410             :     // TODO(schwehr): Fix casting.
    2411             :     // Fetch data.
    2412     2331320 :     switch (poWK->eWorkingDataType)
    2413             :     {
    2414     1121080 :         case GDT_Byte:
    2415             :         {
    2416     1121080 :             GByte *pSrc =
    2417     1121080 :                 reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
    2418     1121080 :             pSrc += iSrcOffset;
    2419     3243850 :             for (int i = 0; i < nSrcLen; i += 2)
    2420             :             {
    2421     2122770 :                 adfReal[i] = pSrc[i];
    2422     2122770 :                 adfReal[i + 1] = pSrc[i + 1];
    2423             :             }
    2424     1121080 :             break;
    2425             :         }
    2426             : 
    2427           0 :         case GDT_Int8:
    2428             :         {
    2429           0 :             GInt8 *pSrc =
    2430           0 :                 reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
    2431           0 :             pSrc += iSrcOffset;
    2432           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2433             :             {
    2434           0 :                 adfReal[i] = pSrc[i];
    2435           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2436             :             }
    2437           0 :             break;
    2438             :         }
    2439             : 
    2440        5614 :         case GDT_Int16:
    2441             :         {
    2442        5614 :             GInt16 *pSrc =
    2443        5614 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2444        5614 :             pSrc += iSrcOffset;
    2445       21492 :             for (int i = 0; i < nSrcLen; i += 2)
    2446             :             {
    2447       15878 :                 adfReal[i] = pSrc[i];
    2448       15878 :                 adfReal[i + 1] = pSrc[i + 1];
    2449             :             }
    2450        5614 :             break;
    2451             :         }
    2452             : 
    2453        4142 :         case GDT_UInt16:
    2454             :         {
    2455        4142 :             GUInt16 *pSrc =
    2456        4142 :                 reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
    2457        4142 :             pSrc += iSrcOffset;
    2458       18548 :             for (int i = 0; i < nSrcLen; i += 2)
    2459             :             {
    2460       14406 :                 adfReal[i] = pSrc[i];
    2461       14406 :                 adfReal[i + 1] = pSrc[i + 1];
    2462             :             }
    2463        4142 :             break;
    2464             :         }
    2465             : 
    2466        1158 :         case GDT_Int32:
    2467             :         {
    2468        1158 :             GInt32 *pSrc =
    2469        1158 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2470        1158 :             pSrc += iSrcOffset;
    2471        3048 :             for (int i = 0; i < nSrcLen; i += 2)
    2472             :             {
    2473        1890 :                 adfReal[i] = pSrc[i];
    2474        1890 :                 adfReal[i + 1] = pSrc[i + 1];
    2475             :             }
    2476        1158 :             break;
    2477             :         }
    2478             : 
    2479         778 :         case GDT_UInt32:
    2480             :         {
    2481         778 :             GUInt32 *pSrc =
    2482         778 :                 reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
    2483         778 :             pSrc += iSrcOffset;
    2484        2288 :             for (int i = 0; i < nSrcLen; i += 2)
    2485             :             {
    2486        1510 :                 adfReal[i] = pSrc[i];
    2487        1510 :                 adfReal[i + 1] = pSrc[i + 1];
    2488             :             }
    2489         778 :             break;
    2490             :         }
    2491             : 
    2492         218 :         case GDT_Int64:
    2493             :         {
    2494         218 :             auto pSrc =
    2495         218 :                 reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
    2496         218 :             pSrc += iSrcOffset;
    2497         436 :             for (int i = 0; i < nSrcLen; i += 2)
    2498             :             {
    2499         218 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2500         218 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2501             :             }
    2502         218 :             break;
    2503             :         }
    2504             : 
    2505          28 :         case GDT_UInt64:
    2506             :         {
    2507          28 :             auto pSrc =
    2508          28 :                 reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
    2509          28 :             pSrc += iSrcOffset;
    2510          56 :             for (int i = 0; i < nSrcLen; i += 2)
    2511             :             {
    2512          28 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2513          28 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2514             :             }
    2515          28 :             break;
    2516             :         }
    2517             : 
    2518           0 :         case GDT_Float16:
    2519             :         {
    2520           0 :             GFloat16 *pSrc =
    2521           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2522           0 :             pSrc += iSrcOffset;
    2523           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2524             :             {
    2525           0 :                 adfReal[i] = pSrc[i];
    2526           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2527             :             }
    2528           0 :             break;
    2529             :         }
    2530             : 
    2531       25102 :         case GDT_Float32:
    2532             :         {
    2533       25102 :             float *pSrc =
    2534       25102 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2535       25102 :             pSrc += iSrcOffset;
    2536      121403 :             for (int i = 0; i < nSrcLen; i += 2)
    2537             :             {
    2538       96301 :                 adfReal[i] = pSrc[i];
    2539       96301 :                 adfReal[i + 1] = pSrc[i + 1];
    2540             :             }
    2541       25102 :             break;
    2542             :         }
    2543             : 
    2544         968 :         case GDT_Float64:
    2545             :         {
    2546         968 :             double *pSrc =
    2547         968 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2548         968 :             pSrc += iSrcOffset;
    2549        2668 :             for (int i = 0; i < nSrcLen; i += 2)
    2550             :             {
    2551        1700 :                 adfReal[i] = pSrc[i];
    2552        1700 :                 adfReal[i + 1] = pSrc[i + 1];
    2553             :             }
    2554         968 :             break;
    2555             :         }
    2556             : 
    2557     1169410 :         case GDT_CInt16:
    2558             :         {
    2559     1169410 :             GInt16 *pSrc =
    2560     1169410 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2561     1169410 :             pSrc += 2 * iSrcOffset;
    2562     4676400 :             for (int i = 0; i < nSrcLen; i += 2)
    2563             :             {
    2564     3506990 :                 adfReal[i] = pSrc[2 * i];
    2565     3506990 :                 padfImag[i] = pSrc[2 * i + 1];
    2566             : 
    2567     3506990 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2568     3506990 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2569             :             }
    2570     1169410 :             break;
    2571             :         }
    2572             : 
    2573         940 :         case GDT_CInt32:
    2574             :         {
    2575         940 :             GInt32 *pSrc =
    2576         940 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2577         940 :             pSrc += 2 * iSrcOffset;
    2578        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2579             :             {
    2580        1672 :                 adfReal[i] = pSrc[2 * i];
    2581        1672 :                 padfImag[i] = pSrc[2 * i + 1];
    2582             : 
    2583        1672 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2584        1672 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2585             :             }
    2586         940 :             break;
    2587             :         }
    2588             : 
    2589           0 :         case GDT_CFloat16:
    2590             :         {
    2591           0 :             GFloat16 *pSrc =
    2592           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2593           0 :             pSrc += 2 * iSrcOffset;
    2594           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2595             :             {
    2596           0 :                 adfReal[i] = pSrc[2 * i];
    2597           0 :                 padfImag[i] = pSrc[2 * i + 1];
    2598             : 
    2599           0 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2600           0 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2601             :             }
    2602           0 :             break;
    2603             :         }
    2604             : 
    2605         940 :         case GDT_CFloat32:
    2606             :         {
    2607         940 :             float *pSrc =
    2608         940 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2609         940 :             pSrc += 2 * iSrcOffset;
    2610        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2611             :             {
    2612        1672 :                 adfReal[i] = pSrc[2 * i];
    2613        1672 :                 padfImag[i] = pSrc[2 * i + 1];
    2614             : 
    2615        1672 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2616        1672 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2617             :             }
    2618         940 :             break;
    2619             :         }
    2620             : 
    2621         940 :         case GDT_CFloat64:
    2622             :         {
    2623         940 :             double *pSrc =
    2624         940 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2625         940 :             pSrc += 2 * iSrcOffset;
    2626        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2627             :             {
    2628        1672 :                 adfReal[i] = pSrc[2 * i];
    2629        1672 :                 padfImag[i] = pSrc[2 * i + 1];
    2630             : 
    2631        1672 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2632        1672 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2633             :             }
    2634         940 :             break;
    2635             :         }
    2636             : 
    2637           0 :         case GDT_Unknown:
    2638             :         case GDT_TypeCount:
    2639           0 :             CPLAssert(false);
    2640             :             if (padfDensity)
    2641             :                 memset(padfDensity, 0, nSrcLen * sizeof(double));
    2642             :             return false;
    2643             :     }
    2644             : 
    2645     2331320 :     if (padfDensity == nullptr)
    2646     1197590 :         return true;
    2647             : 
    2648     1133730 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2649             :     {
    2650     3234760 :         for (int i = 0; i < nSrcLen; i += 2)
    2651             :         {
    2652             :             // Take into account earlier calcs.
    2653     2113130 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2654             :             {
    2655     2073230 :                 padfDensity[i] = 1.0;
    2656     2073230 :                 bHasValid = true;
    2657             :             }
    2658             : 
    2659     2113130 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2660             :             {
    2661     2073880 :                 padfDensity[i + 1] = 1.0;
    2662     2073880 :                 bHasValid = true;
    2663             :             }
    2664             :         }
    2665             :     }
    2666             :     else
    2667             :     {
    2668       54348 :         for (int i = 0; i < nSrcLen; i += 2)
    2669             :         {
    2670       42243 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2671       42243 :                 padfDensity[i] = poWK->pafUnifiedSrcDensity[iSrcOffset + i];
    2672       42243 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2673       41704 :                 bHasValid = true;
    2674             : 
    2675       42243 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2676       42243 :                 padfDensity[i + 1] =
    2677       42243 :                     poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1];
    2678       42243 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2679       41598 :                 bHasValid = true;
    2680             :         }
    2681             :     }
    2682             : 
    2683     1133730 :     return bHasValid;
    2684             : }
    2685             : 
    2686             : /************************************************************************/
    2687             : /*                          GWKGetPixelT()                              */
    2688             : /************************************************************************/
    2689             : 
    2690             : template <class T>
    2691     7159332 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
    2692             :                          GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
    2693             : 
    2694             : {
    2695     7159332 :     T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2696             : 
    2697    16456570 :     if ((poWK->panUnifiedSrcValid != nullptr &&
    2698    14318624 :          !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
    2699     7159332 :         (poWK->papanBandSrcValid != nullptr &&
    2700          21 :          poWK->papanBandSrcValid[iBand] != nullptr &&
    2701          21 :          !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
    2702             :     {
    2703           9 :         *pdfDensity = 0.0;
    2704           9 :         return false;
    2705             :     }
    2706             : 
    2707     7159332 :     *pValue = pSrc[iSrcOffset];
    2708             : 
    2709     7159332 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2710     6974919 :         *pdfDensity = 1.0;
    2711             :     else
    2712      184414 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2713             : 
    2714     7159332 :     return *pdfDensity != 0.0;
    2715             : }
    2716             : 
    2717             : /************************************************************************/
    2718             : /*                        GWKBilinearResample()                         */
    2719             : /*     Set of bilinear interpolators                                    */
    2720             : /************************************************************************/
    2721             : 
    2722       72824 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2723             :                                        double dfSrcX, double dfSrcY,
    2724             :                                        double *pdfDensity, double *pdfReal,
    2725             :                                        double *pdfImag)
    2726             : 
    2727             : {
    2728             :     // Save as local variables to avoid following pointers.
    2729       72824 :     const int nSrcXSize = poWK->nSrcXSize;
    2730       72824 :     const int nSrcYSize = poWK->nSrcYSize;
    2731             : 
    2732       72824 :     int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2733       72824 :     int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2734       72824 :     double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2735       72824 :     double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2736       72824 :     bool bShifted = false;
    2737             : 
    2738       72824 :     if (iSrcX == -1)
    2739             :     {
    2740         292 :         iSrcX = 0;
    2741         292 :         dfRatioX = 1;
    2742             :     }
    2743       72824 :     if (iSrcY == -1)
    2744             :     {
    2745        7686 :         iSrcY = 0;
    2746        7686 :         dfRatioY = 1;
    2747             :     }
    2748       72824 :     GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    2749             : 
    2750             :     // Shift so we don't overrun the array.
    2751       72824 :     if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
    2752       72764 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
    2753       72764 :             iSrcOffset + nSrcXSize + 1)
    2754             :     {
    2755         120 :         bShifted = true;
    2756         120 :         --iSrcOffset;
    2757             :     }
    2758             : 
    2759       72824 :     double adfDensity[2] = {0.0, 0.0};
    2760       72824 :     double adfReal[2] = {0.0, 0.0};
    2761       72824 :     double adfImag[2] = {0.0, 0.0};
    2762       72824 :     double dfAccumulatorReal = 0.0;
    2763       72824 :     double dfAccumulatorImag = 0.0;
    2764       72824 :     double dfAccumulatorDensity = 0.0;
    2765       72824 :     double dfAccumulatorDivisor = 0.0;
    2766             : 
    2767       72824 :     const GPtrDiff_t nSrcPixels =
    2768       72824 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
    2769             :     // Get pixel row.
    2770       72824 :     if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
    2771      145648 :         iSrcOffset < nSrcPixels &&
    2772       72824 :         GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
    2773             :                        adfImag))
    2774             :     {
    2775       67168 :         double dfMult1 = dfRatioX * dfRatioY;
    2776       67168 :         double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
    2777             : 
    2778             :         // Shifting corrected.
    2779       67168 :         if (bShifted)
    2780             :         {
    2781         120 :             adfReal[0] = adfReal[1];
    2782         120 :             adfImag[0] = adfImag[1];
    2783         120 :             adfDensity[0] = adfDensity[1];
    2784             :         }
    2785             : 
    2786             :         // Upper Left Pixel.
    2787       67168 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2788       67168 :             adfDensity[0] > SRC_DENSITY_THRESHOLD)
    2789             :         {
    2790       61738 :             dfAccumulatorDivisor += dfMult1;
    2791             : 
    2792       61738 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    2793       61738 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    2794       61738 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    2795             :         }
    2796             : 
    2797             :         // Upper Right Pixel.
    2798       67168 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    2799       66547 :             adfDensity[1] > SRC_DENSITY_THRESHOLD)
    2800             :         {
    2801       61273 :             dfAccumulatorDivisor += dfMult2;
    2802             : 
    2803       61273 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    2804       61273 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    2805       61273 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    2806             :         }
    2807             :     }
    2808             : 
    2809             :     // Get pixel row.
    2810       72824 :     if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
    2811      214350 :         iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
    2812       68702 :         GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
    2813             :                        adfReal, adfImag))
    2814             :     {
    2815       63143 :         double dfMult1 = dfRatioX * (1.0 - dfRatioY);
    2816       63143 :         double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    2817             : 
    2818             :         // Shifting corrected
    2819       63143 :         if (bShifted)
    2820             :         {
    2821          60 :             adfReal[0] = adfReal[1];
    2822          60 :             adfImag[0] = adfImag[1];
    2823          60 :             adfDensity[0] = adfDensity[1];
    2824             :         }
    2825             : 
    2826             :         // Lower Left Pixel
    2827       63143 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2828       63143 :             adfDensity[0] > SRC_DENSITY_THRESHOLD)
    2829             :         {
    2830       57864 :             dfAccumulatorDivisor += dfMult1;
    2831             : 
    2832       57864 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    2833       57864 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    2834       57864 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    2835             :         }
    2836             : 
    2837             :         // Lower Right Pixel.
    2838       63143 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    2839       62582 :             adfDensity[1] > SRC_DENSITY_THRESHOLD)
    2840             :         {
    2841       57605 :             dfAccumulatorDivisor += dfMult2;
    2842             : 
    2843       57605 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    2844       57605 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    2845       57605 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    2846             :         }
    2847             :     }
    2848             : 
    2849             :     /* -------------------------------------------------------------------- */
    2850             :     /*      Return result.                                                  */
    2851             :     /* -------------------------------------------------------------------- */
    2852       72824 :     if (dfAccumulatorDivisor == 1.0)
    2853             :     {
    2854       41767 :         *pdfReal = dfAccumulatorReal;
    2855       41767 :         *pdfImag = dfAccumulatorImag;
    2856       41767 :         *pdfDensity = dfAccumulatorDensity;
    2857       41767 :         return false;
    2858             :     }
    2859       31057 :     else if (dfAccumulatorDivisor < 0.00001)
    2860             :     {
    2861           0 :         *pdfReal = 0.0;
    2862           0 :         *pdfImag = 0.0;
    2863           0 :         *pdfDensity = 0.0;
    2864           0 :         return false;
    2865             :     }
    2866             :     else
    2867             :     {
    2868       31057 :         *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
    2869       31057 :         *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
    2870       31057 :         *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
    2871       31057 :         return true;
    2872             :     }
    2873             : }
    2874             : 
    2875             : template <class T>
    2876     5116014 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    2877             :                                                int iBand, double dfSrcX,
    2878             :                                                double dfSrcY, T *pValue)
    2879             : 
    2880             : {
    2881             : 
    2882     5116014 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2883     5116014 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2884     5116014 :     GPtrDiff_t iSrcOffset =
    2885     5116014 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    2886     5116014 :     const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2887     5116014 :     const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2888             : 
    2889     5116014 :     const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2890             : 
    2891     5116014 :     if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    2892     5012847 :         iSrcY + 1 < poWK->nSrcYSize)
    2893             :     {
    2894     4988678 :         const double dfAccumulator =
    2895     4988678 :             (pSrc[iSrcOffset] * dfRatioX +
    2896     4988678 :              pSrc[iSrcOffset + 1] * (1.0 - dfRatioX)) *
    2897             :                 dfRatioY +
    2898     4988678 :             (pSrc[iSrcOffset + poWK->nSrcXSize] * dfRatioX +
    2899     4988678 :              pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * (1.0 - dfRatioX)) *
    2900     4988678 :                 (1.0 - dfRatioY);
    2901             : 
    2902     4988678 :         *pValue = GWKRoundValueT<T>(dfAccumulator);
    2903             : 
    2904     4988678 :         return true;
    2905             :     }
    2906             : 
    2907      127349 :     double dfAccumulatorDivisor = 0.0;
    2908      127349 :     double dfAccumulator = 0.0;
    2909             : 
    2910             :     // Upper Left Pixel.
    2911      127349 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
    2912       53440 :         iSrcY < poWK->nSrcYSize)
    2913             :     {
    2914       53440 :         const double dfMult = dfRatioX * dfRatioY;
    2915             : 
    2916       53440 :         dfAccumulatorDivisor += dfMult;
    2917             : 
    2918       53440 :         dfAccumulator += pSrc[iSrcOffset] * dfMult;
    2919             :     }
    2920             : 
    2921             :     // Upper Right Pixel.
    2922      127349 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    2923       61354 :         iSrcY < poWK->nSrcYSize)
    2924             :     {
    2925       61354 :         const double dfMult = (1.0 - dfRatioX) * dfRatioY;
    2926             : 
    2927       61354 :         dfAccumulatorDivisor += dfMult;
    2928             : 
    2929       61354 :         dfAccumulator += pSrc[iSrcOffset + 1] * dfMult;
    2930             :     }
    2931             : 
    2932             :     // Lower Right Pixel.
    2933      127349 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    2934       97471 :         iSrcY + 1 < poWK->nSrcYSize)
    2935             :     {
    2936       72902 :         const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    2937             : 
    2938       72902 :         dfAccumulatorDivisor += dfMult;
    2939             : 
    2940       72902 :         dfAccumulator += pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * dfMult;
    2941             :     }
    2942             : 
    2943             :     // Lower Left Pixel.
    2944      127349 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    2945       89535 :         iSrcY + 1 < poWK->nSrcYSize)
    2946             :     {
    2947       64758 :         const double dfMult = dfRatioX * (1.0 - dfRatioY);
    2948             : 
    2949       64758 :         dfAccumulatorDivisor += dfMult;
    2950             : 
    2951       64758 :         dfAccumulator += pSrc[iSrcOffset + poWK->nSrcXSize] * dfMult;
    2952             :     }
    2953             : 
    2954             :     /* -------------------------------------------------------------------- */
    2955             :     /*      Return result.                                                  */
    2956             :     /* -------------------------------------------------------------------- */
    2957      127349 :     double dfValue = 0.0;
    2958             : 
    2959      127349 :     if (dfAccumulatorDivisor < 0.00001)
    2960             :     {
    2961           0 :         *pValue = 0;
    2962           0 :         return false;
    2963             :     }
    2964      127349 :     else if (dfAccumulatorDivisor == 1.0)
    2965             :     {
    2966        8767 :         dfValue = dfAccumulator;
    2967             :     }
    2968             :     else
    2969             :     {
    2970      118582 :         dfValue = dfAccumulator / dfAccumulatorDivisor;
    2971             :     }
    2972             : 
    2973      127349 :     *pValue = GWKRoundValueT<T>(dfValue);
    2974             : 
    2975      127349 :     return true;
    2976             : }
    2977             : 
    2978             : /************************************************************************/
    2979             : /*                        GWKCubicResample()                            */
    2980             : /*     Set of bicubic interpolators using cubic convolution.            */
    2981             : /************************************************************************/
    2982             : 
    2983             : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
    2984             : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
    2985             : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
    2986             : 
    2987             : template <typename T>
    2988     1602850 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
    2989             :                                  T f1, T f2, T f3)
    2990             : {
    2991     1602850 :     return (f1 + T(0.5) * (distance1 * (f2 - f0) +
    2992     1602850 :                            distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
    2993     1602850 :                            distance3 * (3 * (f1 - f2) + f3 - f0)));
    2994             : }
    2995             : 
    2996             : /************************************************************************/
    2997             : /*                       GWKCubicComputeWeights()                       */
    2998             : /************************************************************************/
    2999             : 
    3000             : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
    3001             : 
    3002             : template <typename T>
    3003     2267674 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
    3004             : {
    3005     2267674 :     const T halfX = T(0.5) * x;
    3006     2267674 :     const T threeX = T(3.0) * x;
    3007     2267674 :     const T halfX2 = halfX * x;
    3008             : 
    3009     2267674 :     coeffs[0] = halfX * (-1 + x * (2 - x));
    3010     2267674 :     coeffs[1] = 1 + halfX2 * (-5 + threeX);
    3011     2267674 :     coeffs[2] = halfX * (1 + x * (4 - threeX));
    3012     2267674 :     coeffs[3] = halfX2 * (-1 + x);
    3013     2267674 : }
    3014             : 
    3015             : // TODO(schwehr): Use an inline function.
    3016             : #define CONVOL4(v1, v2)                                                        \
    3017             :     ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2] +               \
    3018             :      (v1)[3] * (v2)[3])
    3019             : 
    3020             : #if 0
    3021             : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
    3022             : // instead of 17.
    3023             : // TODO(schwehr): Use an inline function.
    3024             : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX)             \
    3025             :     {                                                                          \
    3026             :         const double dfX = dfX_;                                               \
    3027             :         dfHalfX = 0.5 * dfX;                                                   \
    3028             :         const double dfThreeX = 3.0 * dfX;                                     \
    3029             :         const double dfXMinus1 = dfX - 1;                                      \
    3030             :                                                                                \
    3031             :         adfCoeffs[0] = -1 + dfX * (2 - dfX);                                   \
    3032             :         adfCoeffs[1] = dfX * (-5 + dfThreeX);                                  \
    3033             :         /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/                           \
    3034             :         adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1];                              \
    3035             :         /*adfCoeffs[3] = dfX * (-1 + dfX); */                                  \
    3036             :         adfCoeffs[3] = dfXMinus1 - adfCoeffs[0];                               \
    3037             :     }
    3038             : 
    3039             : // TODO(schwehr): Use an inline function.
    3040             : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX)                               \
    3041             :     ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
    3042             :                            (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
    3043             : #endif
    3044             : 
    3045      299879 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
    3046             :                                     double dfSrcX, double dfSrcY,
    3047             :                                     double *pdfDensity, double *pdfReal,
    3048             :                                     double *pdfImag)
    3049             : 
    3050             : {
    3051      299879 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3052      299879 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3053      299879 :     GPtrDiff_t iSrcOffset =
    3054      299879 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3055      299879 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3056      299879 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3057      299879 :     double adfDensity[4] = {};
    3058      299879 :     double adfReal[4] = {};
    3059      299879 :     double adfImag[4] = {};
    3060             : 
    3061             :     // Get the bilinear interpolation at the image borders.
    3062      299879 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3063      284412 :         iSrcY + 2 >= poWK->nSrcYSize)
    3064       24136 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3065       24136 :                                           pdfDensity, pdfReal, pdfImag);
    3066             : 
    3067      275743 :     double adfValueDens[4] = {};
    3068      275743 :     double adfValueReal[4] = {};
    3069      275743 :     double adfValueImag[4] = {};
    3070             : 
    3071      275743 :     double adfCoeffsX[4] = {};
    3072      275743 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3073             : 
    3074     1232410 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3075             :     {
    3076     1003120 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3077      991507 :                             2, adfDensity, adfReal, adfImag) ||
    3078      991507 :             adfDensity[0] < SRC_DENSITY_THRESHOLD ||
    3079      973867 :             adfDensity[1] < SRC_DENSITY_THRESHOLD ||
    3080     2960190 :             adfDensity[2] < SRC_DENSITY_THRESHOLD ||
    3081      965566 :             adfDensity[3] < SRC_DENSITY_THRESHOLD)
    3082             :         {
    3083       46449 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3084       46449 :                                               pdfDensity, pdfReal, pdfImag);
    3085             :         }
    3086             : 
    3087      956668 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3088      956668 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3089      956668 :         adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
    3090             :     }
    3091             : 
    3092             :     /* -------------------------------------------------------------------- */
    3093             :     /*      For now, if we have any pixels missing in the kernel area,      */
    3094             :     /*      we fallback on using bilinear interpolation.  Ideally we        */
    3095             :     /*      should do "weight adjustment" of our results similarly to       */
    3096             :     /*      what is done for the cubic spline and lanc. interpolators.      */
    3097             :     /* -------------------------------------------------------------------- */
    3098             : 
    3099      229294 :     double adfCoeffsY[4] = {};
    3100      229294 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3101             : 
    3102      229294 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3103      229294 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3104      229294 :     *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
    3105             : 
    3106      229294 :     return true;
    3107             : }
    3108             : 
    3109             : #ifdef USE_SSE2
    3110             : 
    3111             : /************************************************************************/
    3112             : /*                           XMMLoad4Values()                           */
    3113             : /*                                                                      */
    3114             : /*  Load 4 packed byte or uint16, cast them to float and put them in a  */
    3115             : /*  m128 register.                                                      */
    3116             : /************************************************************************/
    3117             : 
    3118      949092 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
    3119             : {
    3120             :     unsigned int i;
    3121      949092 :     memcpy(&i, ptr, 4);
    3122     1898180 :     __m128i xmm_i = _mm_cvtsi32_si128(i);
    3123             :     // Zero extend 4 packed unsigned 8-bit integers in a to packed
    3124             :     // 32-bit integers.
    3125             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3126             :     xmm_i = _mm_cvtepu8_epi32(xmm_i);
    3127             : #else
    3128     1898180 :     xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
    3129     1898180 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3130             : #endif
    3131     1898180 :     return _mm_cvtepi32_ps(xmm_i);
    3132             : }
    3133             : 
    3134        5292 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
    3135             : {
    3136             :     GUInt64 i;
    3137        5292 :     memcpy(&i, ptr, 8);
    3138       10584 :     __m128i xmm_i = _mm_cvtsi64_si128(i);
    3139             :     // Zero extend 4 packed unsigned 16-bit integers in a to packed
    3140             :     // 32-bit integers.
    3141             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3142             :     xmm_i = _mm_cvtepu16_epi32(xmm_i);
    3143             : #else
    3144       10584 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3145             : #endif
    3146       10584 :     return _mm_cvtepi32_ps(xmm_i);
    3147             : }
    3148             : 
    3149             : /************************************************************************/
    3150             : /*                           XMMHorizontalAdd()                         */
    3151             : /*                                                                      */
    3152             : /*  Return the sum of the 4 floating points of the register.            */
    3153             : /************************************************************************/
    3154             : 
    3155             : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
    3156             : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3157             : {
    3158             :     __m128 shuf = _mm_movehdup_ps(v);   // (v3   , v3   , v1   , v1)
    3159             :     __m128 sums = _mm_add_ps(v, shuf);  // (v3+v3, v3+v2, v1+v1, v1+v0)
    3160             :     shuf = _mm_movehl_ps(shuf, sums);   // (v3   , v3   , v3+v3, v3+v2)
    3161             :     sums = _mm_add_ss(sums, shuf);      // (v1+v0)+(v3+v2)
    3162             :     return _mm_cvtss_f32(sums);
    3163             : }
    3164             : #else
    3165      238596 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3166             : {
    3167      238596 :     __m128 shuf = _mm_movehl_ps(v, v);     // (v3   , v2   , v3   , v2)
    3168      238596 :     __m128 sums = _mm_add_ps(v, shuf);     // (v3+v3, v2+v2, v3+v1, v2+v0)
    3169      238596 :     shuf = _mm_shuffle_ps(sums, sums, 1);  // (v2+v0, v2+v0, v2+v0, v3+v1)
    3170      238596 :     sums = _mm_add_ss(sums, shuf);         // (v2+v0)+(v3+v1)
    3171      238596 :     return _mm_cvtss_f32(sums);
    3172             : }
    3173             : #endif
    3174             : 
    3175             : #endif  // define USE_SSE2
    3176             : 
    3177             : /************************************************************************/
    3178             : /*            GWKCubicResampleSrcMaskIsDensity4SampleRealT()            */
    3179             : /************************************************************************/
    3180             : 
    3181             : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
    3182             : // because there are a few assumptions above those types.
    3183             : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
    3184             : // perf benefit.
    3185             : 
    3186             : template <class T>
    3187         361 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
    3188             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3189             :     double *pdfDensity, double *pdfReal)
    3190             : {
    3191         361 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3192         361 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3193         361 :     const GPtrDiff_t iSrcOffset =
    3194         361 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3195             : 
    3196             :     // Get the bilinear interpolation at the image borders.
    3197         361 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3198         361 :         iSrcY + 2 >= poWK->nSrcYSize)
    3199             :     {
    3200           0 :         double adfImagIgnored[4] = {};
    3201           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3202           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3203             :     }
    3204             : 
    3205             : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3206             :     const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
    3207             :     const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
    3208             : 
    3209             :     // TODO(schwehr): Explain the magic numbers.
    3210             :     float afTemp[4 + 4 + 4 + 1];
    3211             :     float *pafAligned =
    3212             :         reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
    3213             :     float *pafCoeffs = pafAligned;
    3214             :     float *pafDensity = pafAligned + 4;
    3215             :     float *pafValue = pafAligned + 8;
    3216             : 
    3217             :     const float fHalfDeltaX = 0.5f * fDeltaX;
    3218             :     const float fThreeDeltaX = 3.0f * fDeltaX;
    3219             :     const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
    3220             : 
    3221             :     pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
    3222             :     pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
    3223             :     pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
    3224             :     pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
    3225             :     __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
    3226             :     const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD);
    3227             : 
    3228             :     __m128 xmmMaskLowDensity = _mm_setzero_ps();
    3229             :     for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
    3230             :          i++, iOffset += poWK->nSrcXSize)
    3231             :     {
    3232             :         const __m128 xmmDensity =
    3233             :             _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
    3234             :         xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
    3235             :                                       _mm_cmplt_ps(xmmDensity, xmmThreshold));
    3236             :         pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3237             : 
    3238             :         const __m128 xmmValues =
    3239             :             XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
    3240             :         pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
    3241             :     }
    3242             :     if (_mm_movemask_ps(xmmMaskLowDensity))
    3243             :     {
    3244             :         double adfImagIgnored[4] = {};
    3245             :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3246             :                                           pdfDensity, pdfReal, adfImagIgnored);
    3247             :     }
    3248             : 
    3249             :     const float fHalfDeltaY = 0.5f * fDeltaY;
    3250             :     const float fThreeDeltaY = 3.0f * fDeltaY;
    3251             :     const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
    3252             : 
    3253             :     pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
    3254             :     pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
    3255             :     pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
    3256             :     pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
    3257             : 
    3258             :     xmmCoeffs = _mm_load_ps(pafCoeffs);
    3259             : 
    3260             :     const __m128 xmmDensity = _mm_load_ps(pafDensity);
    3261             :     const __m128 xmmValue = _mm_load_ps(pafValue);
    3262             :     *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3263             :     *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
    3264             : 
    3265             :     // We did all above computations on float32 whereas the general case is
    3266             :     // float64. Not sure if one is fundamentally more correct than the other
    3267             :     // one, but we want our optimization to give the same result as the
    3268             :     // general case as much as possible, so if the resulting value is
    3269             :     // close to some_int_value + 0.5, redo the computation with the general
    3270             :     // case.
    3271             :     // Note: If other types than Byte or UInt16, will need changes.
    3272             :     if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
    3273             :         return true;
    3274             : 
    3275             : #endif  // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3276             : 
    3277         361 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3278         361 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3279             : 
    3280         361 :     double adfValueDens[4] = {};
    3281         361 :     double adfValueReal[4] = {};
    3282             : 
    3283         361 :     double adfCoeffsX[4] = {};
    3284         361 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3285             : 
    3286         361 :     double adfCoeffsY[4] = {};
    3287         361 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3288             : 
    3289        1433 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3290             :     {
    3291        1177 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3292             : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
    3293        1177 :         if (poWK->pafUnifiedSrcDensity[iOffset + 0] < SRC_DENSITY_THRESHOLD ||
    3294        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 1] < SRC_DENSITY_THRESHOLD ||
    3295        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 2] < SRC_DENSITY_THRESHOLD ||
    3296        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 3] < SRC_DENSITY_THRESHOLD)
    3297             :         {
    3298         105 :             double adfImagIgnored[4] = {};
    3299         105 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3300             :                                               pdfDensity, pdfReal,
    3301         105 :                                               adfImagIgnored);
    3302             :         }
    3303             : #endif
    3304             : 
    3305        1072 :         adfValueDens[i + 1] =
    3306        1072 :             CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
    3307             : 
    3308        1072 :         adfValueReal[i + 1] = CONVOL4(
    3309             :             adfCoeffsX,
    3310             :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3311             :     }
    3312             : 
    3313         256 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3314         256 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3315             : 
    3316         256 :     return true;
    3317             : }
    3318             : 
    3319             : /************************************************************************/
    3320             : /*              GWKCubicResampleSrcMaskIsDensity4SampleReal()             */
    3321             : /*     Bi-cubic when source has and only has pafUnifiedSrcDensity.      */
    3322             : /************************************************************************/
    3323             : 
    3324           0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
    3325             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3326             :     double *pdfDensity, double *pdfReal)
    3327             : 
    3328             : {
    3329           0 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3330           0 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3331           0 :     const GPtrDiff_t iSrcOffset =
    3332           0 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3333           0 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3334           0 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3335             : 
    3336             :     // Get the bilinear interpolation at the image borders.
    3337           0 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3338           0 :         iSrcY + 2 >= poWK->nSrcYSize)
    3339             :     {
    3340           0 :         double adfImagIgnored[4] = {};
    3341           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3342           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3343             :     }
    3344             : 
    3345           0 :     double adfCoeffsX[4] = {};
    3346           0 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3347             : 
    3348           0 :     double adfCoeffsY[4] = {};
    3349           0 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3350             : 
    3351           0 :     double adfValueDens[4] = {};
    3352           0 :     double adfValueReal[4] = {};
    3353           0 :     double adfDensity[4] = {};
    3354           0 :     double adfReal[4] = {};
    3355           0 :     double adfImagIgnored[4] = {};
    3356             : 
    3357           0 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3358             :     {
    3359           0 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3360           0 :                             2, adfDensity, adfReal, adfImagIgnored) ||
    3361           0 :             adfDensity[0] < SRC_DENSITY_THRESHOLD ||
    3362           0 :             adfDensity[1] < SRC_DENSITY_THRESHOLD ||
    3363           0 :             adfDensity[2] < SRC_DENSITY_THRESHOLD ||
    3364           0 :             adfDensity[3] < SRC_DENSITY_THRESHOLD)
    3365             :         {
    3366           0 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3367             :                                               pdfDensity, pdfReal,
    3368           0 :                                               adfImagIgnored);
    3369             :         }
    3370             : 
    3371           0 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3372           0 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3373             :     }
    3374             : 
    3375           0 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3376           0 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3377             : 
    3378           0 :     return true;
    3379             : }
    3380             : 
    3381             : template <class T>
    3382     1906603 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3383             :                                             int iBand, double dfSrcX,
    3384             :                                             double dfSrcY, T *pValue)
    3385             : 
    3386             : {
    3387     1906603 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3388     1906603 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3389     1906603 :     const GPtrDiff_t iSrcOffset =
    3390     1906603 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3391     1906603 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3392     1906603 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3393     1906603 :     const double dfDeltaY2 = dfDeltaY * dfDeltaY;
    3394     1906603 :     const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
    3395             : 
    3396             :     // Get the bilinear interpolation at the image borders.
    3397     1906603 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3398     1662527 :         iSrcY + 2 >= poWK->nSrcYSize)
    3399      303751 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    3400      303751 :                                                   pValue);
    3401             : 
    3402     1602852 :     double adfCoeffs[4] = {};
    3403     1602852 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
    3404             : 
    3405     1602852 :     double adfValue[4] = {};
    3406             : 
    3407     8014250 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3408             :     {
    3409     6411406 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3410             : 
    3411     6411406 :         adfValue[i + 1] = CONVOL4(
    3412             :             adfCoeffs,
    3413             :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3414             :     }
    3415             : 
    3416             :     const double dfValue =
    3417     1602852 :         CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
    3418             :                          adfValue[1], adfValue[2], adfValue[3]);
    3419             : 
    3420     1602852 :     *pValue = GWKClampValueT<T>(dfValue);
    3421             : 
    3422     1602852 :     return true;
    3423             : }
    3424             : 
    3425             : /************************************************************************/
    3426             : /*                          GWKLanczosSinc()                            */
    3427             : /************************************************************************/
    3428             : 
    3429             : /*
    3430             :  * Lanczos windowed sinc interpolation kernel with radius r.
    3431             :  *        /
    3432             :  *        | sinc(x) * sinc(x/r), if |x| < r
    3433             :  * L(x) = | 1, if x = 0                     ,
    3434             :  *        | 0, otherwise
    3435             :  *        \
    3436             :  *
    3437             :  * where sinc(x) = sin(PI * x) / (PI * x).
    3438             :  */
    3439             : 
    3440        1056 : static double GWKLanczosSinc(double dfX)
    3441             : {
    3442        1056 :     if (dfX == 0.0)
    3443           0 :         return 1.0;
    3444             : 
    3445        1056 :     const double dfPIX = M_PI * dfX;
    3446        1056 :     const double dfPIXoverR = dfPIX / 3;
    3447        1056 :     const double dfPIX2overR = dfPIX * dfPIXoverR;
    3448             :     // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3449             :     // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3450        1056 :     const double dfSinPIXoverR = sin(dfPIXoverR);
    3451        1056 :     const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3452        1056 :     const double dfSinPIXMulSinPIXoverR =
    3453        1056 :         (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3454        1056 :     return dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3455             : }
    3456             : 
    3457      106242 : static double GWKLanczosSinc4Values(double *padfValues)
    3458             : {
    3459      531210 :     for (int i = 0; i < 4; i++)
    3460             :     {
    3461      424968 :         if (padfValues[i] == 0.0)
    3462             :         {
    3463           0 :             padfValues[i] = 1.0;
    3464             :         }
    3465             :         else
    3466             :         {
    3467      424968 :             const double dfPIX = M_PI * padfValues[i];
    3468      424968 :             const double dfPIXoverR = dfPIX / 3;
    3469      424968 :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3470             :             // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3471             :             // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3472      424968 :             const double dfSinPIXoverR = sin(dfPIXoverR);
    3473      424968 :             const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3474      424968 :             const double dfSinPIXMulSinPIXoverR =
    3475      424968 :                 (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3476      424968 :             padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3477             :         }
    3478             :     }
    3479      106242 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3480             : }
    3481             : 
    3482             : /************************************************************************/
    3483             : /*                           GWKBilinear()                              */
    3484             : /************************************************************************/
    3485             : 
    3486     6669050 : static double GWKBilinear(double dfX)
    3487             : {
    3488     6669050 :     double dfAbsX = fabs(dfX);
    3489     6669050 :     if (dfAbsX <= 1.0)
    3490     6197920 :         return 1 - dfAbsX;
    3491             :     else
    3492      471127 :         return 0.0;
    3493             : }
    3494             : 
    3495      396360 : static double GWKBilinear4Values(double *padfValues)
    3496             : {
    3497      396360 :     double dfAbsX0 = fabs(padfValues[0]);
    3498      396360 :     double dfAbsX1 = fabs(padfValues[1]);
    3499      396360 :     double dfAbsX2 = fabs(padfValues[2]);
    3500      396360 :     double dfAbsX3 = fabs(padfValues[3]);
    3501      396360 :     if (dfAbsX0 <= 1.0)
    3502      290431 :         padfValues[0] = 1 - dfAbsX0;
    3503             :     else
    3504      105929 :         padfValues[0] = 0.0;
    3505      396360 :     if (dfAbsX1 <= 1.0)
    3506      396360 :         padfValues[1] = 1 - dfAbsX1;
    3507             :     else
    3508           0 :         padfValues[1] = 0.0;
    3509      396360 :     if (dfAbsX2 <= 1.0)
    3510      396360 :         padfValues[2] = 1 - dfAbsX2;
    3511             :     else
    3512           0 :         padfValues[2] = 0.0;
    3513      396360 :     if (dfAbsX3 <= 1.0)
    3514      290324 :         padfValues[3] = 1 - dfAbsX3;
    3515             :     else
    3516      106036 :         padfValues[3] = 0.0;
    3517      396360 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3518             : }
    3519             : 
    3520             : /************************************************************************/
    3521             : /*                            GWKCubic()                                */
    3522             : /************************************************************************/
    3523             : 
    3524     4337930 : static double GWKCubic(double dfX)
    3525             : {
    3526     4337930 :     return CubicKernel(dfX);
    3527             : }
    3528             : 
    3529     7078940 : static double GWKCubic4Values(double *padfValues)
    3530             : {
    3531     7078940 :     const double dfAbsX_0 = fabs(padfValues[0]);
    3532     7078940 :     const double dfAbsX_1 = fabs(padfValues[1]);
    3533     7078940 :     const double dfAbsX_2 = fabs(padfValues[2]);
    3534     7078940 :     const double dfAbsX_3 = fabs(padfValues[3]);
    3535     7078940 :     const double dfX2_0 = padfValues[0] * padfValues[0];
    3536     7078940 :     const double dfX2_1 = padfValues[1] * padfValues[1];
    3537     7078940 :     const double dfX2_2 = padfValues[2] * padfValues[2];
    3538     7078940 :     const double dfX2_3 = padfValues[3] * padfValues[3];
    3539             : 
    3540     7078940 :     double dfVal0 = 0.0;
    3541     7078940 :     if (dfAbsX_0 <= 1.0)
    3542     1030550 :         dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
    3543     6048390 :     else if (dfAbsX_0 <= 2.0)
    3544     4290000 :         dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
    3545             : 
    3546     7078940 :     double dfVal1 = 0.0;
    3547     7078940 :     if (dfAbsX_1 <= 1.0)
    3548     4108100 :         dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
    3549     2970840 :     else if (dfAbsX_1 <= 2.0)
    3550     2966620 :         dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
    3551             : 
    3552     7078940 :     double dfVal2 = 0.0;
    3553     7078940 :     if (dfAbsX_2 <= 1.0)
    3554     5922680 :         dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
    3555     1156260 :     else if (dfAbsX_2 <= 2.0)
    3556     1151440 :         dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
    3557             : 
    3558     7078940 :     double dfVal3 = 0.0;
    3559     7078940 :     if (dfAbsX_3 <= 1.0)
    3560     3163030 :         dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
    3561     3915910 :     else if (dfAbsX_3 <= 2.0)
    3562     3646870 :         dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
    3563             : 
    3564     7078940 :     padfValues[0] = dfVal0;
    3565     7078940 :     padfValues[1] = dfVal1;
    3566     7078940 :     padfValues[2] = dfVal2;
    3567     7078940 :     padfValues[3] = dfVal3;
    3568     7078940 :     return dfVal0 + dfVal1 + dfVal2 + dfVal3;
    3569             : }
    3570             : 
    3571             : /************************************************************************/
    3572             : /*                           GWKBSpline()                               */
    3573             : /************************************************************************/
    3574             : 
    3575             : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
    3576             : // Equation 8 with (B,C)=(1,0)
    3577             : // 1/6 * ( 3 * |x|^3 -  6 * |x|^2 + 4) |x| < 1
    3578             : // 1/6 * ( -|x|^3 + 6 |x|^2  - 12|x| + 8) |x| >= 1 and |x| < 2
    3579             : 
    3580      138696 : static double GWKBSpline(double x)
    3581             : {
    3582      138696 :     const double xp2 = x + 2.0;
    3583      138696 :     const double xp1 = x + 1.0;
    3584      138696 :     const double xm1 = x - 1.0;
    3585             : 
    3586             :     // This will most likely be used, so we'll compute it ahead of time to
    3587             :     // avoid stalling the processor.
    3588      138696 :     const double xp2c = xp2 * xp2 * xp2;
    3589             : 
    3590             :     // Note that the test is computed only if it is needed.
    3591             :     // TODO(schwehr): Make this easier to follow.
    3592             :     return xp2 > 0.0
    3593      277392 :                ? ((xp1 > 0.0)
    3594      138696 :                       ? ((x > 0.0)
    3595      124338 :                              ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3596       89912 :                                    6.0 * x * x * x
    3597             :                              : 0.0) +
    3598      124338 :                             -4.0 * xp1 * xp1 * xp1
    3599             :                       : 0.0) +
    3600             :                      xp2c
    3601      138696 :                : 0.0;  // * 0.166666666666666666666
    3602             : }
    3603             : 
    3604     2220360 : static double GWKBSpline4Values(double *padfValues)
    3605             : {
    3606    11101800 :     for (int i = 0; i < 4; i++)
    3607             :     {
    3608     8881440 :         const double x = padfValues[i];
    3609     8881440 :         const double xp2 = x + 2.0;
    3610     8881440 :         const double xp1 = x + 1.0;
    3611     8881440 :         const double xm1 = x - 1.0;
    3612             : 
    3613             :         // This will most likely be used, so we'll compute it ahead of time to
    3614             :         // avoid stalling the processor.
    3615     8881440 :         const double xp2c = xp2 * xp2 * xp2;
    3616             : 
    3617             :         // Note that the test is computed only if it is needed.
    3618             :         // TODO(schwehr): Make this easier to follow.
    3619     8881440 :         padfValues[i] =
    3620             :             (xp2 > 0.0)
    3621    17762900 :                 ? ((xp1 > 0.0)
    3622     8881440 :                        ? ((x > 0.0)
    3623     6660880 :                               ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3624     4437750 :                                     6.0 * x * x * x
    3625             :                               : 0.0) +
    3626     6660880 :                              -4.0 * xp1 * xp1 * xp1
    3627             :                        : 0.0) +
    3628             :                       xp2c
    3629             :                 : 0.0;  // * 0.166666666666666666666
    3630             :     }
    3631     2220360 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3632             : }
    3633             : /************************************************************************/
    3634             : /*                       GWKResampleWrkStruct                           */
    3635             : /************************************************************************/
    3636             : 
    3637             : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
    3638             : 
    3639             : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
    3640             :                                    double dfSrcX, double dfSrcY,
    3641             :                                    double *pdfDensity, double *pdfReal,
    3642             :                                    double *pdfImag,
    3643             :                                    GWKResampleWrkStruct *psWrkStruct);
    3644             : 
    3645             : struct _GWKResampleWrkStruct
    3646             : {
    3647             :     pfnGWKResampleType pfnGWKResample;
    3648             : 
    3649             :     // Space for saved X weights.
    3650             :     double *padfWeightsX;
    3651             :     bool *pabCalcX;
    3652             : 
    3653             :     double *padfWeightsY;       // Only used by GWKResampleOptimizedLanczos.
    3654             :     int iLastSrcX;              // Only used by GWKResampleOptimizedLanczos.
    3655             :     int iLastSrcY;              // Only used by GWKResampleOptimizedLanczos.
    3656             :     double dfLastDeltaX;        // Only used by GWKResampleOptimizedLanczos.
    3657             :     double dfLastDeltaY;        // Only used by GWKResampleOptimizedLanczos.
    3658             :     double dfCosPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3659             :     double dfSinPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3660             :     double dfCosPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3661             :     double dfSinPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3662             :     double dfCosPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3663             :     double dfSinPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3664             :     double dfCosPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3665             :     double dfSinPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3666             : 
    3667             :     // Space for saving a row of pixels.
    3668             :     double *padfRowDensity;
    3669             :     double *padfRowReal;
    3670             :     double *padfRowImag;
    3671             : };
    3672             : 
    3673             : /************************************************************************/
    3674             : /*                    GWKResampleCreateWrkStruct()                      */
    3675             : /************************************************************************/
    3676             : 
    3677             : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3678             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3679             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
    3680             : 
    3681             : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3682             :                                         double dfSrcX, double dfSrcY,
    3683             :                                         double *pdfDensity, double *pdfReal,
    3684             :                                         double *pdfImag,
    3685             :                                         GWKResampleWrkStruct *psWrkStruct);
    3686             : 
    3687         352 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
    3688             : {
    3689         352 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3690         352 :     const int nYDist = (poWK->nYRadius + 1) * 2;
    3691             : 
    3692             :     GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
    3693         352 :         CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
    3694             : 
    3695             :     // Alloc space for saved X weights.
    3696         352 :     psWrkStruct->padfWeightsX =
    3697         352 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3698         352 :     psWrkStruct->pabCalcX =
    3699         352 :         static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
    3700             : 
    3701         352 :     psWrkStruct->padfWeightsY =
    3702         352 :         static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
    3703         352 :     psWrkStruct->iLastSrcX = -10;
    3704         352 :     psWrkStruct->iLastSrcY = -10;
    3705         352 :     psWrkStruct->dfLastDeltaX = -10;
    3706         352 :     psWrkStruct->dfLastDeltaY = -10;
    3707             : 
    3708             :     // Alloc space for saving a row of pixels.
    3709         352 :     if (poWK->pafUnifiedSrcDensity == nullptr &&
    3710         324 :         poWK->panUnifiedSrcValid == nullptr &&
    3711         312 :         poWK->papanBandSrcValid == nullptr)
    3712             :     {
    3713         312 :         psWrkStruct->padfRowDensity = nullptr;
    3714             :     }
    3715             :     else
    3716             :     {
    3717          40 :         psWrkStruct->padfRowDensity =
    3718          40 :             static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3719             :     }
    3720         352 :     psWrkStruct->padfRowReal =
    3721         352 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3722         352 :     psWrkStruct->padfRowImag =
    3723         352 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3724             : 
    3725         352 :     if (poWK->eResample == GRA_Lanczos)
    3726             :     {
    3727          63 :         psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
    3728             : 
    3729          63 :         if (poWK->dfXScale < 1)
    3730             :         {
    3731           4 :             psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
    3732           4 :             psWrkStruct->dfSinPiXScaleOver3 =
    3733           4 :                 sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
    3734           4 :                              psWrkStruct->dfCosPiXScaleOver3);
    3735             :             // "Naive":
    3736             :             // const double dfCosPiXScale = cos(  M_PI * dfXScale );
    3737             :             // const double dfSinPiXScale = sin(  M_PI * dfXScale );
    3738             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3739           4 :             psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
    3740           4 :                                               psWrkStruct->dfCosPiXScaleOver3 -
    3741           4 :                                           3) *
    3742           4 :                                          psWrkStruct->dfCosPiXScaleOver3;
    3743           4 :             psWrkStruct->dfSinPiXScale = sqrt(
    3744           4 :                 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
    3745             :         }
    3746             : 
    3747          63 :         if (poWK->dfYScale < 1)
    3748             :         {
    3749          11 :             psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
    3750          11 :             psWrkStruct->dfSinPiYScaleOver3 =
    3751          11 :                 sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
    3752          11 :                              psWrkStruct->dfCosPiYScaleOver3);
    3753             :             // "Naive":
    3754             :             // const double dfCosPiYScale = cos(  M_PI * dfYScale );
    3755             :             // const double dfSinPiYScale = sin(  M_PI * dfYScale );
    3756             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3757          11 :             psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
    3758          11 :                                               psWrkStruct->dfCosPiYScaleOver3 -
    3759          11 :                                           3) *
    3760          11 :                                          psWrkStruct->dfCosPiYScaleOver3;
    3761          11 :             psWrkStruct->dfSinPiYScale = sqrt(
    3762          11 :                 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
    3763             :         }
    3764             :     }
    3765             :     else
    3766         289 :         psWrkStruct->pfnGWKResample = GWKResample;
    3767             : 
    3768         352 :     return psWrkStruct;
    3769             : }
    3770             : 
    3771             : /************************************************************************/
    3772             : /*                    GWKResampleDeleteWrkStruct()                      */
    3773             : /************************************************************************/
    3774             : 
    3775         352 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
    3776             : {
    3777         352 :     CPLFree(psWrkStruct->padfWeightsX);
    3778         352 :     CPLFree(psWrkStruct->padfWeightsY);
    3779         352 :     CPLFree(psWrkStruct->pabCalcX);
    3780         352 :     CPLFree(psWrkStruct->padfRowDensity);
    3781         352 :     CPLFree(psWrkStruct->padfRowReal);
    3782         352 :     CPLFree(psWrkStruct->padfRowImag);
    3783         352 :     CPLFree(psWrkStruct);
    3784         352 : }
    3785             : 
    3786             : /************************************************************************/
    3787             : /*                           GWKResample()                              */
    3788             : /************************************************************************/
    3789             : 
    3790      239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3791             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3792             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
    3793             : 
    3794             : {
    3795             :     // Save as local variables to avoid following pointers in loops.
    3796      239383 :     const int nSrcXSize = poWK->nSrcXSize;
    3797      239383 :     const int nSrcYSize = poWK->nSrcYSize;
    3798             : 
    3799      239383 :     double dfAccumulatorReal = 0.0;
    3800      239383 :     double dfAccumulatorImag = 0.0;
    3801      239383 :     double dfAccumulatorDensity = 0.0;
    3802      239383 :     double dfAccumulatorWeight = 0.0;
    3803      239383 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3804      239383 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3805      239383 :     const GPtrDiff_t iSrcOffset =
    3806      239383 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    3807      239383 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3808      239383 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3809             : 
    3810      239383 :     const double dfXScale = poWK->dfXScale;
    3811      239383 :     const double dfYScale = poWK->dfYScale;
    3812             : 
    3813      239383 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3814             : 
    3815             :     // Space for saved X weights.
    3816      239383 :     double *padfWeightsX = psWrkStruct->padfWeightsX;
    3817      239383 :     bool *pabCalcX = psWrkStruct->pabCalcX;
    3818             : 
    3819             :     // Space for saving a row of pixels.
    3820      239383 :     double *padfRowDensity = psWrkStruct->padfRowDensity;
    3821      239383 :     double *padfRowReal = psWrkStruct->padfRowReal;
    3822      239383 :     double *padfRowImag = psWrkStruct->padfRowImag;
    3823             : 
    3824             :     // Mark as needing calculation (don't calculate the weights yet,
    3825             :     // because a mask may render it unnecessary).
    3826      239383 :     memset(pabCalcX, false, nXDist * sizeof(bool));
    3827             : 
    3828      239383 :     FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
    3829      239383 :     CPLAssert(pfnGetWeight);
    3830             : 
    3831             :     // Skip sampling over edge of image.
    3832      239383 :     int j = poWK->nFiltInitY;
    3833      239383 :     int jMax = poWK->nYRadius;
    3834      239383 :     if (iSrcY + j < 0)
    3835         566 :         j = -iSrcY;
    3836      239383 :     if (iSrcY + jMax >= nSrcYSize)
    3837         662 :         jMax = nSrcYSize - iSrcY - 1;
    3838             : 
    3839      239383 :     int iMin = poWK->nFiltInitX;
    3840      239383 :     int iMax = poWK->nXRadius;
    3841      239383 :     if (iSrcX + iMin < 0)
    3842         566 :         iMin = -iSrcX;
    3843      239383 :     if (iSrcX + iMax >= nSrcXSize)
    3844         659 :         iMax = nSrcXSize - iSrcX - 1;
    3845             : 
    3846      239383 :     const int bXScaleBelow1 = (dfXScale < 1.0);
    3847      239383 :     const int bYScaleBelow1 = (dfYScale < 1.0);
    3848             : 
    3849      239383 :     GPtrDiff_t iRowOffset =
    3850      239383 :         iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
    3851             : 
    3852             :     // Loop over pixel rows in the kernel.
    3853     1445930 :     for (; j <= jMax; ++j)
    3854             :     {
    3855     1206540 :         iRowOffset += nSrcXSize;
    3856             : 
    3857             :         // Get pixel values.
    3858             :         // We can potentially read extra elements after the "normal" end of the
    3859             :         // source arrays, but the contract of papabySrcImage[iBand],
    3860             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    3861             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    3862     1206540 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    3863             :                             padfRowDensity, padfRowReal, padfRowImag))
    3864          72 :             continue;
    3865             : 
    3866             :         // Calculate the Y weight.
    3867             :         double dfWeight1 = (bYScaleBelow1)
    3868     1206470 :                                ? pfnGetWeight((j - dfDeltaY) * dfYScale)
    3869        1600 :                                : pfnGetWeight(j - dfDeltaY);
    3870             : 
    3871             :         // Iterate over pixels in row.
    3872     1206470 :         double dfAccumulatorRealLocal = 0.0;
    3873     1206470 :         double dfAccumulatorImagLocal = 0.0;
    3874     1206470 :         double dfAccumulatorDensityLocal = 0.0;
    3875     1206470 :         double dfAccumulatorWeightLocal = 0.0;
    3876             : 
    3877     7317420 :         for (int i = iMin; i <= iMax; ++i)
    3878             :         {
    3879             :             // Skip sampling if pixel has zero density.
    3880     6110940 :             if (padfRowDensity != nullptr &&
    3881       77277 :                 padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
    3882         546 :                 continue;
    3883             : 
    3884     6110400 :             double dfWeight2 = 0.0;
    3885             : 
    3886             :             // Make or use a cached set of weights for this row.
    3887     6110400 :             if (pabCalcX[i - iMin])
    3888             :             {
    3889             :                 // Use saved weight value instead of recomputing it.
    3890     4903920 :                 dfWeight2 = padfWeightsX[i - iMin];
    3891             :             }
    3892             :             else
    3893             :             {
    3894             :                 // Calculate & save the X weight.
    3895     1206480 :                 padfWeightsX[i - iMin] = dfWeight2 =
    3896     1206480 :                     (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
    3897        1600 :                                     : pfnGetWeight(i - dfDeltaX);
    3898             : 
    3899     1206480 :                 pabCalcX[i - iMin] = true;
    3900             :             }
    3901             : 
    3902             :             // Accumulate!
    3903     6110400 :             dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
    3904     6110400 :             dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
    3905     6110400 :             if (padfRowDensity != nullptr)
    3906       76731 :                 dfAccumulatorDensityLocal +=
    3907       76731 :                     padfRowDensity[i - iMin] * dfWeight2;
    3908     6110400 :             dfAccumulatorWeightLocal += dfWeight2;
    3909             :         }
    3910             : 
    3911     1206470 :         dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
    3912     1206470 :         dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
    3913     1206470 :         dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
    3914     1206470 :         dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
    3915             :     }
    3916             : 
    3917      239383 :     if (dfAccumulatorWeight < 0.000001 ||
    3918        1887 :         (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
    3919             :     {
    3920           0 :         *pdfDensity = 0.0;
    3921           0 :         return false;
    3922             :     }
    3923             : 
    3924             :     // Calculate the output taking into account weighting.
    3925      239383 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    3926             :     {
    3927      239380 :         *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
    3928      239380 :         *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
    3929      239380 :         if (padfRowDensity != nullptr)
    3930        1884 :             *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
    3931             :         else
    3932      237496 :             *pdfDensity = 1.0;
    3933             :     }
    3934             :     else
    3935             :     {
    3936           3 :         *pdfReal = dfAccumulatorReal;
    3937           3 :         *pdfImag = dfAccumulatorImag;
    3938           3 :         if (padfRowDensity != nullptr)
    3939           3 :             *pdfDensity = dfAccumulatorDensity;
    3940             :         else
    3941           0 :             *pdfDensity = 1.0;
    3942             :     }
    3943             : 
    3944      239383 :     return true;
    3945             : }
    3946             : 
    3947             : /************************************************************************/
    3948             : /*                      GWKResampleOptimizedLanczos()                   */
    3949             : /************************************************************************/
    3950             : 
    3951      617144 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3952             :                                         double dfSrcX, double dfSrcY,
    3953             :                                         double *pdfDensity, double *pdfReal,
    3954             :                                         double *pdfImag,
    3955             :                                         GWKResampleWrkStruct *psWrkStruct)
    3956             : 
    3957             : {
    3958             :     // Save as local variables to avoid following pointers in loops.
    3959      617144 :     const int nSrcXSize = poWK->nSrcXSize;
    3960      617144 :     const int nSrcYSize = poWK->nSrcYSize;
    3961             : 
    3962      617144 :     double dfAccumulatorReal = 0.0;
    3963      617144 :     double dfAccumulatorImag = 0.0;
    3964      617144 :     double dfAccumulatorDensity = 0.0;
    3965      617144 :     double dfAccumulatorWeight = 0.0;
    3966      617144 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3967      617144 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3968      617144 :     const GPtrDiff_t iSrcOffset =
    3969      617144 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    3970      617144 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3971      617144 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3972             : 
    3973      617144 :     const double dfXScale = poWK->dfXScale;
    3974      617144 :     const double dfYScale = poWK->dfYScale;
    3975             : 
    3976             :     // Space for saved X weights.
    3977      617144 :     double *const padfWeightsXShifted =
    3978      617144 :         psWrkStruct->padfWeightsX - poWK->nFiltInitX;
    3979      617144 :     double *const padfWeightsYShifted =
    3980      617144 :         psWrkStruct->padfWeightsY - poWK->nFiltInitY;
    3981             : 
    3982             :     // Space for saving a row of pixels.
    3983      617144 :     double *const padfRowDensity = psWrkStruct->padfRowDensity;
    3984      617144 :     double *const padfRowReal = psWrkStruct->padfRowReal;
    3985      617144 :     double *const padfRowImag = psWrkStruct->padfRowImag;
    3986             : 
    3987             :     // Skip sampling over edge of image.
    3988      617144 :     int jMin = poWK->nFiltInitY;
    3989      617144 :     int jMax = poWK->nYRadius;
    3990      617144 :     if (iSrcY + jMin < 0)
    3991       16522 :         jMin = -iSrcY;
    3992      617144 :     if (iSrcY + jMax >= nSrcYSize)
    3993        5782 :         jMax = nSrcYSize - iSrcY - 1;
    3994             : 
    3995      617144 :     int iMin = poWK->nFiltInitX;
    3996      617144 :     int iMax = poWK->nXRadius;
    3997      617144 :     if (iSrcX + iMin < 0)
    3998       15797 :         iMin = -iSrcX;
    3999      617144 :     if (iSrcX + iMax >= nSrcXSize)
    4000        4657 :         iMax = nSrcXSize - iSrcX - 1;
    4001             : 
    4002      617144 :     if (dfXScale < 1.0)
    4003             :     {
    4004      403041 :         while ((iMin - dfDeltaX) * dfXScale < -3.0)
    4005      200179 :             iMin++;
    4006      202862 :         while ((iMax - dfDeltaX) * dfXScale > 3.0)
    4007           0 :             iMax--;
    4008             : 
    4009             :         // clang-format off
    4010             :         /*
    4011             :         Naive version:
    4012             :         for (int i = iMin; i <= iMax; ++i)
    4013             :         {
    4014             :             psWrkStruct->padfWeightsXShifted[i] =
    4015             :                 GWKLanczosSinc((i - dfDeltaX) * dfXScale);
    4016             :         }
    4017             : 
    4018             :         but given that:
    4019             : 
    4020             :         GWKLanczosSinc(x):
    4021             :             if (dfX == 0.0)
    4022             :                 return 1.0;
    4023             : 
    4024             :             const double dfPIX = M_PI * dfX;
    4025             :             const double dfPIXoverR = dfPIX / 3;
    4026             :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    4027             :             return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
    4028             : 
    4029             :         and
    4030             :             sin (a + b) = sin a cos b + cos a sin b.
    4031             :             cos (a + b) = cos a cos b - sin a sin b.
    4032             : 
    4033             :         we can skip any sin() computation within the loop
    4034             :         */
    4035             :         // clang-format on
    4036             : 
    4037      202862 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    4038      131072 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    4039             :         {
    4040       71790 :             double dfX = (iMin - dfDeltaX) * dfXScale;
    4041             : 
    4042       71790 :             double dfPIXover3 = M_PI / 3 * dfX;
    4043       71790 :             double dfCosOver3 = cos(dfPIXover3);
    4044       71790 :             double dfSinOver3 = sin(dfPIXover3);
    4045             : 
    4046             :             // "Naive":
    4047             :             // double dfSin = sin( M_PI * dfX );
    4048             :             // double dfCos = cos( M_PI * dfX );
    4049             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4050       71790 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4051       71790 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4052             : 
    4053       71790 :             const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
    4054       71790 :             const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
    4055       71790 :             const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
    4056       71790 :             const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
    4057       71790 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4058       71790 :             padfWeightsXShifted[iMin] =
    4059       71790 :                 dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
    4060     1636480 :             for (int i = iMin + 1; i <= iMax; ++i)
    4061             :             {
    4062     1564690 :                 dfX += dfXScale;
    4063     1564690 :                 const double dfNewSin =
    4064     1564690 :                     dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
    4065     1564690 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
    4066     1564690 :                                              dfCosOver3 * dfSinPiXScaleOver3;
    4067     1564690 :                 padfWeightsXShifted[i] =
    4068             :                     dfX == 0
    4069     1564690 :                         ? 1.0
    4070     1564690 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
    4071     1564690 :                 const double dfNewCos =
    4072     1564690 :                     dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
    4073     1564690 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
    4074     1564690 :                                              dfSinOver3 * dfSinPiXScaleOver3;
    4075     1564690 :                 dfSin = dfNewSin;
    4076     1564690 :                 dfCos = dfNewCos;
    4077     1564690 :                 dfSinOver3 = dfNewSinOver3;
    4078     1564690 :                 dfCosOver3 = dfNewCosOver3;
    4079             :             }
    4080             : 
    4081       71790 :             psWrkStruct->iLastSrcX = iSrcX;
    4082       71790 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4083             :         }
    4084             :     }
    4085             :     else
    4086             :     {
    4087      757542 :         while (iMin - dfDeltaX < -3.0)
    4088      343260 :             iMin++;
    4089      414282 :         while (iMax - dfDeltaX > 3.0)
    4090           0 :             iMax--;
    4091             : 
    4092      414282 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    4093      209580 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    4094             :         {
    4095             :             // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
    4096             :             // following trigonometric formulas.
    4097             : 
    4098             :             // TODO(schwehr): Move this somewhere where it can be rendered at
    4099             :             // LaTeX.
    4100             :             // clang-format off
    4101             :             // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
    4102             :             //                            cos(M_PI * dfBase) * sin(M_PI * k)
    4103             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
    4104             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
    4105             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
    4106             : 
    4107             :             // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
    4108             :             //                                  cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
    4109             :             // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
    4110             :             // clang-format on
    4111             : 
    4112      414282 :             const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
    4113      414282 :             const double dfSin2PIDeltaXOver3 =
    4114             :                 dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
    4115             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
    4116      414282 :             const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
    4117      414282 :             const double dfSinPIDeltaX =
    4118      414282 :                 (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
    4119      414282 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4120      414282 :             const double dfInvPI2Over3xSinPIDeltaX =
    4121             :                 dfInvPI2Over3 * dfSinPIDeltaX;
    4122      414282 :             const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
    4123      414282 :                 -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
    4124      414282 :             const double dfSinPIOver3 = 0.8660254037844386;
    4125      414282 :             const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
    4126      414282 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
    4127             :             const double padfCst[] = {
    4128      414282 :                 dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
    4129      414282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
    4130             :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
    4131      414282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
    4132      414282 :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
    4133             : 
    4134     2936860 :             for (int i = iMin; i <= iMax; ++i)
    4135             :             {
    4136     2522570 :                 const double dfX = i - dfDeltaX;
    4137     2522570 :                 if (dfX == 0.0)
    4138       58282 :                     padfWeightsXShifted[i] = 1.0;
    4139             :                 else
    4140     2464290 :                     padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
    4141             : #if DEBUG_VERBOSE
    4142             :                     // TODO(schwehr): AlmostEqual.
    4143             :                     // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
    4144             :                     //               GWKLanczosSinc(dfX, 3.0)) < 1e-10);
    4145             : #endif
    4146             :             }
    4147             : 
    4148      414282 :             psWrkStruct->iLastSrcX = iSrcX;
    4149      414282 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4150             :         }
    4151             :     }
    4152             : 
    4153      617144 :     if (dfYScale < 1.0)
    4154             :     {
    4155      403116 :         while ((jMin - dfDeltaY) * dfYScale < -3.0)
    4156      200254 :             jMin++;
    4157      202862 :         while ((jMax - dfDeltaY) * dfYScale > 3.0)
    4158           0 :             jMax--;
    4159             : 
    4160             :         // clang-format off
    4161             :         /*
    4162             :         Naive version:
    4163             :         for (int j = jMin; j <= jMax; ++j)
    4164             :         {
    4165             :             padfWeightsYShifted[j] =
    4166             :                 GWKLanczosSinc((j - dfDeltaY) * dfYScale);
    4167             :         }
    4168             :         */
    4169             :         // clang-format on
    4170             : 
    4171      202862 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4172      202479 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4173             :         {
    4174         383 :             double dfY = (jMin - dfDeltaY) * dfYScale;
    4175             : 
    4176         383 :             double dfPIYover3 = M_PI / 3 * dfY;
    4177         383 :             double dfCosOver3 = cos(dfPIYover3);
    4178         383 :             double dfSinOver3 = sin(dfPIYover3);
    4179             : 
    4180             :             // "Naive":
    4181             :             // double dfSin = sin( M_PI * dfY );
    4182             :             // double dfCos = cos( M_PI * dfY );
    4183             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4184         383 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4185         383 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4186             : 
    4187         383 :             const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
    4188         383 :             const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
    4189         383 :             const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
    4190         383 :             const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
    4191         383 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4192         383 :             padfWeightsYShifted[jMin] =
    4193         383 :                 dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
    4194        7318 :             for (int j = jMin + 1; j <= jMax; ++j)
    4195             :             {
    4196        6935 :                 dfY += dfYScale;
    4197        6935 :                 const double dfNewSin =
    4198        6935 :                     dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
    4199        6935 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
    4200        6935 :                                              dfCosOver3 * dfSinPiYScaleOver3;
    4201        6935 :                 padfWeightsYShifted[j] =
    4202             :                     dfY == 0
    4203        6935 :                         ? 1.0
    4204        6935 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
    4205        6935 :                 const double dfNewCos =
    4206        6935 :                     dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
    4207        6935 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
    4208        6935 :                                              dfSinOver3 * dfSinPiYScaleOver3;
    4209        6935 :                 dfSin = dfNewSin;
    4210        6935 :                 dfCos = dfNewCos;
    4211        6935 :                 dfSinOver3 = dfNewSinOver3;
    4212        6935 :                 dfCosOver3 = dfNewCosOver3;
    4213             :             }
    4214             : 
    4215         383 :             psWrkStruct->iLastSrcY = iSrcY;
    4216         383 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4217             :         }
    4218             :     }
    4219             :     else
    4220             :     {
    4221      684742 :         while (jMin - dfDeltaY < -3.0)
    4222      270460 :             jMin++;
    4223      414282 :         while (jMax - dfDeltaY > 3.0)
    4224           0 :             jMax--;
    4225             : 
    4226      414282 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4227      413663 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4228             :         {
    4229        1132 :             const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
    4230        1132 :             const double dfSin2PIDeltaYOver3 =
    4231             :                 dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
    4232             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
    4233        1132 :             const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
    4234        1132 :             const double dfSinPIDeltaY =
    4235        1132 :                 (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
    4236        1132 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4237        1132 :             const double dfInvPI2Over3xSinPIDeltaY =
    4238             :                 dfInvPI2Over3 * dfSinPIDeltaY;
    4239        1132 :             const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
    4240        1132 :                 -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
    4241        1132 :             const double dfSinPIOver3 = 0.8660254037844386;
    4242        1132 :             const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
    4243        1132 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
    4244             :             const double padfCst[] = {
    4245        1132 :                 dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
    4246        1132 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
    4247             :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
    4248        1132 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
    4249        1132 :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
    4250             : 
    4251        7917 :             for (int j = jMin; j <= jMax; ++j)
    4252             :             {
    4253        6785 :                 const double dfY = j - dfDeltaY;
    4254        6785 :                 if (dfY == 0.0)
    4255         460 :                     padfWeightsYShifted[j] = 1.0;
    4256             :                 else
    4257        6325 :                     padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
    4258             : #if DEBUG_VERBOSE
    4259             :                     // TODO(schwehr): AlmostEqual.
    4260             :                     // CPLAssert(fabs(padfWeightsYShifted[j] -
    4261             :                     //               GWKLanczosSinc(dfY, 3.0)) < 1e-10);
    4262             : #endif
    4263             :             }
    4264             : 
    4265        1132 :             psWrkStruct->iLastSrcY = iSrcY;
    4266        1132 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4267             :         }
    4268             :     }
    4269             : 
    4270             :     // If we have no density information, we can simply compute the
    4271             :     // accumulated weight.
    4272      617144 :     if (padfRowDensity == nullptr)
    4273             :     {
    4274      617144 :         double dfRowAccWeight = 0.0;
    4275     7903490 :         for (int i = iMin; i <= iMax; ++i)
    4276             :         {
    4277     7286350 :             dfRowAccWeight += padfWeightsXShifted[i];
    4278             :         }
    4279      617144 :         double dfColAccWeight = 0.0;
    4280     7958040 :         for (int j = jMin; j <= jMax; ++j)
    4281             :         {
    4282     7340900 :             dfColAccWeight += padfWeightsYShifted[j];
    4283             :         }
    4284      617144 :         dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
    4285             :     }
    4286             : 
    4287             :     // Loop over pixel rows in the kernel.
    4288             : 
    4289      617144 :     if (poWK->eWorkingDataType == GDT_Byte && !poWK->panUnifiedSrcValid &&
    4290      616524 :         !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
    4291             :         !padfRowDensity)
    4292             :     {
    4293             :         // Optimization for Byte case without any masking/alpha
    4294             : 
    4295      616524 :         if (dfAccumulatorWeight < 0.000001)
    4296             :         {
    4297           0 :             *pdfDensity = 0.0;
    4298           0 :             return false;
    4299             :         }
    4300             : 
    4301      616524 :         const GByte *pSrc =
    4302      616524 :             reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
    4303      616524 :         pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4304             : 
    4305             : #if defined(USE_SSE2)
    4306      616524 :         if (iMax - iMin + 1 == 6)
    4307             :         {
    4308             :             // This is just an optimized version of the general case in
    4309             :             // the else clause.
    4310             : 
    4311      346854 :             pSrc += iMin;
    4312      346854 :             int j = jMin;
    4313             :             const auto fourXWeights =
    4314      346854 :                 XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
    4315             : 
    4316             :             // Process 2 lines at the same time.
    4317     1375860 :             for (; j < jMax; j += 2)
    4318             :             {
    4319             :                 const XMMReg4Double v_acc =
    4320     1029000 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4321             :                 const XMMReg4Double v_acc2 =
    4322     1029000 :                     XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
    4323     1029000 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4324     1029000 :                 const double dfRowAccEnd =
    4325     1029000 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4326     1029000 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4327     1029000 :                 dfAccumulatorReal +=
    4328     1029000 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4329     1029000 :                 const double dfRowAcc2 = v_acc2.GetHorizSum();
    4330     1029000 :                 const double dfRowAcc2End =
    4331     1029000 :                     pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
    4332     1029000 :                     pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
    4333     1029000 :                 dfAccumulatorReal +=
    4334     1029000 :                     (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
    4335     1029000 :                 pSrc += 2 * nSrcXSize;
    4336             :             }
    4337      346854 :             if (j == jMax)
    4338             :             {
    4339             :                 // Process last line if there's an odd number of them.
    4340             : 
    4341             :                 const XMMReg4Double v_acc =
    4342       86045 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4343       86045 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4344       86045 :                 const double dfRowAccEnd =
    4345       86045 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4346       86045 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4347       86045 :                 dfAccumulatorReal +=
    4348       86045 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4349             :             }
    4350             :         }
    4351             :         else
    4352             : #endif
    4353             :         {
    4354     5463580 :             for (int j = jMin; j <= jMax; ++j)
    4355             :             {
    4356     5193900 :                 int i = iMin;
    4357     5193900 :                 double dfRowAcc1 = 0.0;
    4358     5193900 :                 double dfRowAcc2 = 0.0;
    4359             :                 // A bit of loop unrolling
    4360    62750600 :                 for (; i < iMax; i += 2)
    4361             :                 {
    4362    57556700 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4363    57556700 :                     dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
    4364             :                 }
    4365     5193900 :                 if (i == iMax)
    4366             :                 {
    4367             :                     // Process last column if there's an odd number of them.
    4368      426183 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4369             :                 }
    4370             : 
    4371     5193900 :                 dfAccumulatorReal +=
    4372     5193900 :                     (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
    4373     5193900 :                 pSrc += nSrcXSize;
    4374             :             }
    4375             :         }
    4376             : 
    4377             :         // Calculate the output taking into account weighting.
    4378      616524 :         if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4379             :         {
    4380      569230 :             const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4381      569230 :             *pdfReal = dfAccumulatorReal * dfInvAcc;
    4382      569230 :             *pdfDensity = 1.0;
    4383             :         }
    4384             :         else
    4385             :         {
    4386       47294 :             *pdfReal = dfAccumulatorReal;
    4387       47294 :             *pdfDensity = 1.0;
    4388             :         }
    4389             : 
    4390      616524 :         return true;
    4391             :     }
    4392             : 
    4393         620 :     GPtrDiff_t iRowOffset =
    4394         620 :         iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
    4395             : 
    4396         620 :     int nCountValid = 0;
    4397         620 :     const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
    4398             : 
    4399        3560 :     for (int j = jMin; j <= jMax; ++j)
    4400             :     {
    4401        2940 :         iRowOffset += nSrcXSize;
    4402             : 
    4403             :         // Get pixel values.
    4404             :         // We can potentially read extra elements after the "normal" end of the
    4405             :         // source arrays, but the contract of papabySrcImage[iBand],
    4406             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4407             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4408        2940 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4409             :                             padfRowDensity, padfRowReal, padfRowImag))
    4410           0 :             continue;
    4411             : 
    4412        2940 :         const double dfWeight1 = padfWeightsYShifted[j];
    4413             : 
    4414             :         // Iterate over pixels in row.
    4415        2940 :         if (padfRowDensity != nullptr)
    4416             :         {
    4417           0 :             for (int i = iMin; i <= iMax; ++i)
    4418             :             {
    4419             :                 // Skip sampling if pixel has zero density.
    4420           0 :                 if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
    4421           0 :                     continue;
    4422             : 
    4423           0 :                 nCountValid++;
    4424             : 
    4425             :                 //  Use a cached set of weights for this row.
    4426           0 :                 const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
    4427             : 
    4428             :                 // Accumulate!
    4429           0 :                 dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
    4430           0 :                 dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
    4431           0 :                 dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
    4432           0 :                 dfAccumulatorWeight += dfWeight2;
    4433             :             }
    4434             :         }
    4435        2940 :         else if (bIsNonComplex)
    4436             :         {
    4437        1764 :             double dfRowAccReal = 0.0;
    4438       10560 :             for (int i = iMin; i <= iMax; ++i)
    4439             :             {
    4440        8796 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4441             : 
    4442             :                 // Accumulate!
    4443        8796 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4444             :             }
    4445             : 
    4446        1764 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4447             :         }
    4448             :         else
    4449             :         {
    4450        1176 :             double dfRowAccReal = 0.0;
    4451        1176 :             double dfRowAccImag = 0.0;
    4452        7040 :             for (int i = iMin; i <= iMax; ++i)
    4453             :             {
    4454        5864 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4455             : 
    4456             :                 // Accumulate!
    4457        5864 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4458        5864 :                 dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
    4459             :             }
    4460             : 
    4461        1176 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4462        1176 :             dfAccumulatorImag += dfRowAccImag * dfWeight1;
    4463             :         }
    4464             :     }
    4465             : 
    4466         620 :     if (dfAccumulatorWeight < 0.000001 ||
    4467           0 :         (padfRowDensity != nullptr &&
    4468           0 :          (dfAccumulatorDensity < 0.000001 ||
    4469           0 :           nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
    4470             :     {
    4471           0 :         *pdfDensity = 0.0;
    4472           0 :         return false;
    4473             :     }
    4474             : 
    4475             :     // Calculate the output taking into account weighting.
    4476         620 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4477             :     {
    4478           0 :         const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4479           0 :         *pdfReal = dfAccumulatorReal * dfInvAcc;
    4480           0 :         *pdfImag = dfAccumulatorImag * dfInvAcc;
    4481           0 :         if (padfRowDensity != nullptr)
    4482           0 :             *pdfDensity = dfAccumulatorDensity * dfInvAcc;
    4483             :         else
    4484           0 :             *pdfDensity = 1.0;
    4485             :     }
    4486             :     else
    4487             :     {
    4488         620 :         *pdfReal = dfAccumulatorReal;
    4489         620 :         *pdfImag = dfAccumulatorImag;
    4490         620 :         if (padfRowDensity != nullptr)
    4491           0 :             *pdfDensity = dfAccumulatorDensity;
    4492             :         else
    4493         620 :             *pdfDensity = 1.0;
    4494             :     }
    4495             : 
    4496         620 :     return true;
    4497             : }
    4498             : 
    4499             : /************************************************************************/
    4500             : /*                        GWKComputeWeights()                           */
    4501             : /************************************************************************/
    4502             : 
    4503     3747080 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
    4504             :                               double dfDeltaX, double dfXScale, int jMin,
    4505             :                               int jMax, double dfDeltaY, double dfYScale,
    4506             :                               double *padfWeightsHorizontal,
    4507             :                               double *padfWeightsVertical, double &dfInvWeights)
    4508             : {
    4509             : 
    4510     3747080 :     const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
    4511     3747080 :     CPLAssert(pfnGetWeight);
    4512     3747080 :     const FilterFunc4ValuesType pfnGetWeight4Values =
    4513     3747080 :         apfGWKFilter4Values[eResample];
    4514     3747080 :     CPLAssert(pfnGetWeight4Values);
    4515             : 
    4516     3747080 :     int i = iMin;  // Used after for.
    4517     3747080 :     int iC = 0;    // Used after for.
    4518             :     // Not zero, but as close as possible to it, to avoid potential division by
    4519             :     // zero at end of function
    4520     3747080 :     double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
    4521     8313540 :     for (; i + 2 < iMax; i += 4, iC += 4)
    4522             :     {
    4523     4566760 :         padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
    4524     4566760 :         padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
    4525     4566760 :         padfWeightsHorizontal[iC + 2] =
    4526     4566760 :             padfWeightsHorizontal[iC + 1] + dfXScale;
    4527     4566760 :         padfWeightsHorizontal[iC + 3] =
    4528     4566760 :             padfWeightsHorizontal[iC + 2] + dfXScale;
    4529     4566450 :         dfAccumulatorWeightHorizontal +=
    4530     4566760 :             pfnGetWeight4Values(padfWeightsHorizontal + iC);
    4531             :     }
    4532     3963760 :     for (; i <= iMax; ++i, ++iC)
    4533             :     {
    4534      220112 :         const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
    4535      216980 :         padfWeightsHorizontal[iC] = dfWeight;
    4536      216980 :         dfAccumulatorWeightHorizontal += dfWeight;
    4537             :     }
    4538             : 
    4539     3743640 :     int j = jMin;  // Used after for.
    4540     3743640 :     int jC = 0;    // Used after for.
    4541             :     // Not zero, but as close as possible to it, to avoid potential division by
    4542             :     // zero at end of function
    4543     3743640 :     double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
    4544     7890620 :     for (; j + 2 < jMax; j += 4, jC += 4)
    4545             :     {
    4546     4147250 :         padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
    4547     4147250 :         padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
    4548     4147250 :         padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
    4549     4147250 :         padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
    4550     4146970 :         dfAccumulatorWeightVertical +=
    4551     4147250 :             pfnGetWeight4Values(padfWeightsVertical + jC);
    4552             :     }
    4553     8247980 :     for (; j <= jMax; ++j, ++jC)
    4554             :     {
    4555     4505800 :         const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
    4556     4504610 :         padfWeightsVertical[jC] = dfWeight;
    4557     4504610 :         dfAccumulatorWeightVertical += dfWeight;
    4558             :     }
    4559             : 
    4560     3742180 :     dfInvWeights =
    4561     3742180 :         1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
    4562     3742180 : }
    4563             : 
    4564             : /************************************************************************/
    4565             : /*                        GWKResampleNoMasksT()                         */
    4566             : /************************************************************************/
    4567             : 
    4568             : template <class T>
    4569             : static bool
    4570             : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    4571             :                     double dfSrcY, T *pValue, double *padfWeightsHorizontal,
    4572             :                     double *padfWeightsVertical, double &dfInvWeights)
    4573             : 
    4574             : {
    4575             :     // Commonly used; save locally.
    4576             :     const int nSrcXSize = poWK->nSrcXSize;
    4577             :     const int nSrcYSize = poWK->nSrcYSize;
    4578             : 
    4579             :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4580             :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4581             :     const GPtrDiff_t iSrcOffset =
    4582             :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4583             : 
    4584             :     const int nXRadius = poWK->nXRadius;
    4585             :     const int nYRadius = poWK->nYRadius;
    4586             : 
    4587             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4588             :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4589             :         nYRadius > nSrcYSize)
    4590             :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4591             :                                                   pValue);
    4592             : 
    4593             :     T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    4594             :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4595             :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4596             : 
    4597             :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4598             :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4599             : 
    4600             :     int iMin = 1 - nXRadius;
    4601             :     if (iSrcX + iMin < 0)
    4602             :         iMin = -iSrcX;
    4603             :     int iMax = nXRadius;
    4604             :     if (iSrcX + iMax >= nSrcXSize - 1)
    4605             :         iMax = nSrcXSize - 1 - iSrcX;
    4606             : 
    4607             :     int jMin = 1 - nYRadius;
    4608             :     if (iSrcY + jMin < 0)
    4609             :         jMin = -iSrcY;
    4610             :     int jMax = nYRadius;
    4611             :     if (iSrcY + jMax >= nSrcYSize - 1)
    4612             :         jMax = nSrcYSize - 1 - iSrcY;
    4613             : 
    4614             :     if (iBand == 0)
    4615             :     {
    4616             :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4617             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4618             :                           padfWeightsVertical, dfInvWeights);
    4619             :     }
    4620             : 
    4621             :     // Loop over all rows in the kernel.
    4622             :     double dfAccumulator = 0.0;
    4623             :     for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
    4624             :     {
    4625             :         const GPtrDiff_t iSampJ =
    4626             :             iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
    4627             : 
    4628             :         // Loop over all pixels in the row.
    4629             :         double dfAccumulatorLocal = 0.0;
    4630             :         double dfAccumulatorLocal2 = 0.0;
    4631             :         int iC = 0;
    4632             :         int i = iMin;
    4633             :         // Process by chunk of 4 cols.
    4634             :         for (; i + 2 < iMax; i += 4, iC += 4)
    4635             :         {
    4636             :             // Retrieve the pixel & accumulate.
    4637             :             dfAccumulatorLocal +=
    4638             :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4639             :             dfAccumulatorLocal +=
    4640             :                 pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
    4641             :             dfAccumulatorLocal2 +=
    4642             :                 pSrcBand[i + 2 + iSampJ] * padfWeightsHorizontal[iC + 2];
    4643             :             dfAccumulatorLocal2 +=
    4644             :                 pSrcBand[i + 3 + iSampJ] * padfWeightsHorizontal[iC + 3];
    4645             :         }
    4646             :         dfAccumulatorLocal += dfAccumulatorLocal2;
    4647             :         if (i < iMax)
    4648             :         {
    4649             :             dfAccumulatorLocal +=
    4650             :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4651             :             dfAccumulatorLocal +=
    4652             :                 pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
    4653             :             i += 2;
    4654             :             iC += 2;
    4655             :         }
    4656             :         if (i == iMax)
    4657             :         {
    4658             :             dfAccumulatorLocal +=
    4659             :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4660             :         }
    4661             : 
    4662             :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4663             :     }
    4664             : 
    4665             :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4666             : 
    4667             :     return true;
    4668             : }
    4669             : 
    4670             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    4671             : /* Could possibly be used too on 32bit, but we would need to check at runtime */
    4672             : #if defined(USE_SSE2)
    4673             : 
    4674             : /************************************************************************/
    4675             : /*                    GWKResampleNoMasks_SSE2_T()                       */
    4676             : /************************************************************************/
    4677             : 
    4678             : template <class T>
    4679     9180523 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
    4680             :                                       double dfSrcX, double dfSrcY, T *pValue,
    4681             :                                       double *padfWeightsHorizontal,
    4682             :                                       double *padfWeightsVertical,
    4683             :                                       double &dfInvWeights)
    4684             : {
    4685             :     // Commonly used; save locally.
    4686     9180523 :     const int nSrcXSize = poWK->nSrcXSize;
    4687     9180523 :     const int nSrcYSize = poWK->nSrcYSize;
    4688             : 
    4689     9180523 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4690     9180523 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4691     9180523 :     const GPtrDiff_t iSrcOffset =
    4692     9180523 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4693     9180523 :     const int nXRadius = poWK->nXRadius;
    4694     9180523 :     const int nYRadius = poWK->nYRadius;
    4695             : 
    4696             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4697     9180523 :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4698             :         nYRadius > nSrcYSize)
    4699        7912 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4700           3 :                                                   pValue);
    4701             : 
    4702     9172611 :     const T *pSrcBand =
    4703     9172611 :         reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    4704             : 
    4705     9172611 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4706     9172611 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4707     9172611 :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4708     9159961 :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4709             : 
    4710     9153591 :     int iMin = 1 - nXRadius;
    4711     9153591 :     if (iSrcX + iMin < 0)
    4712       43143 :         iMin = -iSrcX;
    4713     9153591 :     int iMax = nXRadius;
    4714     9153591 :     if (iSrcX + iMax >= nSrcXSize - 1)
    4715       38106 :         iMax = nSrcXSize - 1 - iSrcX;
    4716             : 
    4717     9153591 :     int jMin = 1 - nYRadius;
    4718     9153591 :     if (iSrcY + jMin < 0)
    4719       49554 :         jMin = -iSrcY;
    4720     9153591 :     int jMax = nYRadius;
    4721     9153591 :     if (iSrcY + jMax >= nSrcYSize - 1)
    4722       36028 :         jMax = nSrcYSize - 1 - iSrcY;
    4723             : 
    4724     9153591 :     if (iBand == 0)
    4725             :     {
    4726     3746231 :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4727             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4728             :                           padfWeightsVertical, dfInvWeights);
    4729             :     }
    4730             : 
    4731     9160371 :     GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4732             :     // Process by chunk of 4 rows.
    4733     9160371 :     int jC = 0;
    4734     9160371 :     int j = jMin;
    4735     9160371 :     double dfAccumulator = 0.0;
    4736    19415293 :     for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
    4737             :     {
    4738             :         // Loop over all pixels in the row.
    4739    10249022 :         int iC = 0;
    4740    10249022 :         int i = iMin;
    4741             :         // Process by chunk of 4 cols.
    4742    10249022 :         XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
    4743    10212372 :         XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
    4744    10228582 :         XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
    4745    10245412 :         XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
    4746    26713680 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4747             :         {
    4748             :             // Retrieve the pixel & accumulate.
    4749    16483688 :             XMMReg4Double v_pixels_1 =
    4750    16483688 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4751    16508288 :             XMMReg4Double v_pixels_2 =
    4752    16508288 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
    4753    16501388 :             XMMReg4Double v_pixels_3 =
    4754    16501388 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4755    16486988 :             XMMReg4Double v_pixels_4 =
    4756    16486988 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4757             : 
    4758    16511088 :             XMMReg4Double v_padfWeight =
    4759    16511088 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4760             : 
    4761    16496688 :             v_acc_1 += v_pixels_1 * v_padfWeight;
    4762    16499988 :             v_acc_2 += v_pixels_2 * v_padfWeight;
    4763    16501988 :             v_acc_3 += v_pixels_3 * v_padfWeight;
    4764    16492088 :             v_acc_4 += v_pixels_4 * v_padfWeight;
    4765             :         }
    4766             : 
    4767    10230002 :         if (i < iMax)
    4768             :         {
    4769      142910 :             XMMReg2Double v_pixels_1 =
    4770      142910 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
    4771      142910 :             XMMReg2Double v_pixels_2 =
    4772      142910 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
    4773      142910 :             XMMReg2Double v_pixels_3 =
    4774      142910 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4775      142910 :             XMMReg2Double v_pixels_4 =
    4776      142910 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4777             : 
    4778      142910 :             XMMReg2Double v_padfWeight =
    4779      142910 :                 XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
    4780             : 
    4781      142910 :             v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
    4782      142910 :             v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
    4783      142910 :             v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
    4784      142910 :             v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
    4785             : 
    4786      142910 :             i += 2;
    4787      142910 :             iC += 2;
    4788             :         }
    4789             : 
    4790    10230002 :         double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
    4791    10246672 :         double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
    4792    10230432 :         double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
    4793    10234522 :         double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
    4794             : 
    4795    10254932 :         if (i == iMax)
    4796             :         {
    4797       49195 :             dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
    4798       49195 :                                     padfWeightsHorizontal[iC];
    4799       49195 :             dfAccumulatorLocal_2 +=
    4800       49195 :                 static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
    4801       49195 :                 padfWeightsHorizontal[iC];
    4802       49195 :             dfAccumulatorLocal_3 +=
    4803       49195 :                 static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
    4804       49195 :                 padfWeightsHorizontal[iC];
    4805       49195 :             dfAccumulatorLocal_4 +=
    4806       49195 :                 static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
    4807       49195 :                 padfWeightsHorizontal[iC];
    4808             :         }
    4809             : 
    4810    10254932 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
    4811    10254932 :         dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
    4812    10254932 :         dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
    4813    10254932 :         dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
    4814             :     }
    4815    22244341 :     for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
    4816             :     {
    4817             :         // Loop over all pixels in the row.
    4818    13069140 :         int iC = 0;
    4819    13069140 :         int i = iMin;
    4820             :         // Process by chunk of 4 cols.
    4821    13069140 :         XMMReg4Double v_acc = XMMReg4Double::Zero();
    4822    26190563 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4823             :         {
    4824             :             // Retrieve the pixel & accumulate.
    4825    13115123 :             XMMReg4Double v_pixels =
    4826    13115123 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4827    13133723 :             XMMReg4Double v_padfWeight =
    4828    13133723 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    4829             : 
    4830    13109123 :             v_acc += v_pixels * v_padfWeight;
    4831             :         }
    4832             : 
    4833    13075540 :         double dfAccumulatorLocal = v_acc.GetHorizSum();
    4834             : 
    4835    13078040 :         if (i < iMax)
    4836             :         {
    4837      173964 :             dfAccumulatorLocal +=
    4838      173964 :                 pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
    4839      173964 :             dfAccumulatorLocal +=
    4840      173964 :                 pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
    4841      173964 :             i += 2;
    4842      173964 :             iC += 2;
    4843             :         }
    4844    13078040 :         if (i == iMax)
    4845             :         {
    4846       33020 :             dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
    4847       33020 :                                   padfWeightsHorizontal[iC];
    4848             :         }
    4849             : 
    4850    13078040 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4851             :     }
    4852             : 
    4853     9175171 :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4854             : 
    4855     9124341 :     return true;
    4856             : }
    4857             : 
    4858             : /************************************************************************/
    4859             : /*                     GWKResampleNoMasksT<GByte>()                     */
    4860             : /************************************************************************/
    4861             : 
    4862             : template <>
    4863     8583790 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
    4864             :                                 double dfSrcX, double dfSrcY, GByte *pValue,
    4865             :                                 double *padfWeightsHorizontal,
    4866             :                                 double *padfWeightsVertical,
    4867             :                                 double &dfInvWeights)
    4868             : {
    4869     8583790 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4870             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4871     8576300 :                                      dfInvWeights);
    4872             : }
    4873             : 
    4874             : /************************************************************************/
    4875             : /*                     GWKResampleNoMasksT<GInt16>()                    */
    4876             : /************************************************************************/
    4877             : 
    4878             : template <>
    4879      252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
    4880             :                                  double dfSrcX, double dfSrcY, GInt16 *pValue,
    4881             :                                  double *padfWeightsHorizontal,
    4882             :                                  double *padfWeightsVertical,
    4883             :                                  double &dfInvWeights)
    4884             : {
    4885      252563 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4886             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4887      252563 :                                      dfInvWeights);
    4888             : }
    4889             : 
    4890             : /************************************************************************/
    4891             : /*                     GWKResampleNoMasksT<GUInt16>()                   */
    4892             : /************************************************************************/
    4893             : 
    4894             : template <>
    4895      343440 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
    4896             :                                   double dfSrcX, double dfSrcY, GUInt16 *pValue,
    4897             :                                   double *padfWeightsHorizontal,
    4898             :                                   double *padfWeightsVertical,
    4899             :                                   double &dfInvWeights)
    4900             : {
    4901      343440 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4902             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4903      343440 :                                      dfInvWeights);
    4904             : }
    4905             : 
    4906             : /************************************************************************/
    4907             : /*                     GWKResampleNoMasksT<float>()                     */
    4908             : /************************************************************************/
    4909             : 
    4910             : template <>
    4911        2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
    4912             :                                 double dfSrcX, double dfSrcY, float *pValue,
    4913             :                                 double *padfWeightsHorizontal,
    4914             :                                 double *padfWeightsVertical,
    4915             :                                 double &dfInvWeights)
    4916             : {
    4917        2500 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4918             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4919        2500 :                                      dfInvWeights);
    4920             : }
    4921             : 
    4922             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    4923             : 
    4924             : /************************************************************************/
    4925             : /*                     GWKResampleNoMasksT<double>()                    */
    4926             : /************************************************************************/
    4927             : 
    4928             : template <>
    4929             : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
    4930             :                                  double dfSrcX, double dfSrcY, double *pValue,
    4931             :                                  double *padfWeightsHorizontal,
    4932             :                                  double *padfWeightsVertical,
    4933             :                                  double &dfInvWeights)
    4934             : {
    4935             :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4936             :                                      padfWeightsHorizontal, padfWeightsVertical,
    4937             :                                      dfInvWeights);
    4938             : }
    4939             : 
    4940             : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
    4941             : 
    4942             : #endif /* defined(USE_SSE2) */
    4943             : 
    4944             : /************************************************************************/
    4945             : /*                     GWKRoundSourceCoordinates()                      */
    4946             : /************************************************************************/
    4947             : 
    4948        1000 : static void GWKRoundSourceCoordinates(
    4949             :     int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
    4950             :     double dfSrcCoordPrecision, double dfErrorThreshold,
    4951             :     GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
    4952             :     double dfDstY)
    4953             : {
    4954        1000 :     double dfPct = 0.8;
    4955        1000 :     if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
    4956             :     {
    4957        1000 :         dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
    4958             :     }
    4959        1000 :     const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
    4960             : 
    4961      501000 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    4962             :     {
    4963      500000 :         const double dfXBefore = padfX[iDstX];
    4964      500000 :         const double dfYBefore = padfY[iDstX];
    4965      500000 :         padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    4966             :                        dfSrcCoordPrecision;
    4967      500000 :         padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    4968             :                        dfSrcCoordPrecision;
    4969             : 
    4970             :         // If we are in an uncertainty zone, go to non-approximated
    4971             :         // transformation.
    4972             :         // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
    4973             :         // be at least 10 times greater than the approximation error.
    4974      500000 :         if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
    4975      399914 :             fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
    4976             :         {
    4977      180090 :             padfX[iDstX] = iDstX + dfDstXOff;
    4978      180090 :             padfY[iDstX] = dfDstY;
    4979      180090 :             padfZ[iDstX] = 0.0;
    4980      180090 :             pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
    4981      180090 :                            padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
    4982      180090 :             padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    4983             :                            dfSrcCoordPrecision;
    4984      180090 :             padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    4985             :                            dfSrcCoordPrecision;
    4986             :         }
    4987             :     }
    4988        1000 : }
    4989             : 
    4990             : /************************************************************************/
    4991             : /*                           GWKOpenCLCase()                            */
    4992             : /*                                                                      */
    4993             : /*      This is identical to GWKGeneralCase(), but functions via        */
    4994             : /*      OpenCL. This means we have vector optimization (SSE) and/or     */
    4995             : /*      GPU optimization depending on our prefs. The code itself is     */
    4996             : /*      general and not optimized, but by defining constants we can     */
    4997             : /*      make some pretty darn good code on the fly.                     */
    4998             : /************************************************************************/
    4999             : 
    5000             : #if defined(HAVE_OPENCL)
    5001           0 : static CPLErr GWKOpenCLCase(GDALWarpKernel *poWK)
    5002             : {
    5003           0 :     const int nDstXSize = poWK->nDstXSize;
    5004           0 :     const int nDstYSize = poWK->nDstYSize;
    5005           0 :     const int nSrcXSize = poWK->nSrcXSize;
    5006           0 :     const int nSrcYSize = poWK->nSrcYSize;
    5007           0 :     const int nDstXOff = poWK->nDstXOff;
    5008           0 :     const int nDstYOff = poWK->nDstYOff;
    5009           0 :     const int nSrcXOff = poWK->nSrcXOff;
    5010           0 :     const int nSrcYOff = poWK->nSrcYOff;
    5011           0 :     bool bUseImag = false;
    5012             : 
    5013             :     cl_channel_type imageFormat;
    5014           0 :     switch (poWK->eWorkingDataType)
    5015             :     {
    5016           0 :         case GDT_Byte:
    5017           0 :             imageFormat = CL_UNORM_INT8;
    5018           0 :             break;
    5019           0 :         case GDT_UInt16:
    5020           0 :             imageFormat = CL_UNORM_INT16;
    5021           0 :             break;
    5022           0 :         case GDT_CInt16:
    5023           0 :             bUseImag = true;
    5024             :             [[fallthrough]];
    5025           0 :         case GDT_Int16:
    5026           0 :             imageFormat = CL_SNORM_INT16;
    5027           0 :             break;
    5028           0 :         case GDT_CFloat32:
    5029           0 :             bUseImag = true;
    5030             :             [[fallthrough]];
    5031           0 :         case GDT_Float32:
    5032           0 :             imageFormat = CL_FLOAT;
    5033           0 :             break;
    5034           0 :         default:
    5035             :             // No support for higher precision formats.
    5036           0 :             CPLDebug("OpenCL", "Unsupported resampling OpenCL data type %d.",
    5037           0 :                      static_cast<int>(poWK->eWorkingDataType));
    5038           0 :             return CE_Warning;
    5039             :     }
    5040             : 
    5041             :     OCLResampAlg resampAlg;
    5042           0 :     switch (poWK->eResample)
    5043             :     {
    5044           0 :         case GRA_Bilinear:
    5045           0 :             resampAlg = OCL_Bilinear;
    5046           0 :             break;
    5047           0 :         case GRA_Cubic:
    5048           0 :             resampAlg = OCL_Cubic;
    5049           0 :             break;
    5050           0 :         case GRA_CubicSpline:
    5051           0 :             resampAlg = OCL_CubicSpline;
    5052           0 :             break;
    5053           0 :         case GRA_Lanczos:
    5054           0 :             resampAlg = OCL_Lanczos;
    5055           0 :             break;
    5056           0 :         default:
    5057             :             // No support for higher precision formats.
    5058           0 :             CPLDebug("OpenCL",
    5059             :                      "Unsupported resampling OpenCL resampling alg %d.",
    5060           0 :                      static_cast<int>(poWK->eResample));
    5061           0 :             return CE_Warning;
    5062             :     }
    5063             : 
    5064           0 :     struct oclWarper *warper = nullptr;
    5065             :     cl_int err;
    5066           0 :     CPLErr eErr = CE_None;
    5067             : 
    5068             :     // TODO(schwehr): Fix indenting.
    5069             :     try
    5070             :     {
    5071             : 
    5072             :         // Using a factor of 2 or 4 seems to have much less rounding error
    5073             :         // than 3 on the GPU.
    5074             :         // Then the rounding error can cause strange artifacts under the
    5075             :         // right conditions.
    5076           0 :         warper = GDALWarpKernelOpenCL_createEnv(
    5077             :             nSrcXSize, nSrcYSize, nDstXSize, nDstYSize, imageFormat,
    5078           0 :             poWK->nBands, 4, bUseImag, poWK->papanBandSrcValid != nullptr,
    5079             :             poWK->pafDstDensity, poWK->padfDstNoDataReal, resampAlg, &err);
    5080             : 
    5081           0 :         if (err != CL_SUCCESS || warper == nullptr)
    5082             :         {
    5083           0 :             eErr = CE_Warning;
    5084           0 :             if (warper != nullptr)
    5085           0 :                 throw eErr;
    5086           0 :             return eErr;
    5087             :         }
    5088             : 
    5089           0 :         CPLDebug("GDAL",
    5090             :                  "GDALWarpKernel()::GWKOpenCLCase() "
    5091             :                  "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
    5092             :                  nSrcXOff, nSrcYOff, nSrcXSize, nSrcYSize, nDstXOff, nDstYOff,
    5093             :                  nDstXSize, nDstYSize);
    5094             : 
    5095           0 :         if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
    5096             :         {
    5097           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    5098           0 :             eErr = CE_Failure;
    5099           0 :             throw eErr;
    5100             :         }
    5101             : 
    5102             :         /* ====================================================================
    5103             :          */
    5104             :         /*      Loop over bands. */
    5105             :         /* ====================================================================
    5106             :          */
    5107           0 :         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5108             :         {
    5109           0 :             if (poWK->papanBandSrcValid != nullptr &&
    5110           0 :                 poWK->papanBandSrcValid[iBand] != nullptr)
    5111             :             {
    5112           0 :                 GDALWarpKernelOpenCL_setSrcValid(
    5113             :                     warper,
    5114           0 :                     reinterpret_cast<int *>(poWK->papanBandSrcValid[iBand]),
    5115             :                     iBand);
    5116           0 :                 if (err != CL_SUCCESS)
    5117             :                 {
    5118           0 :                     CPLError(
    5119             :                         CE_Failure, CPLE_AppDefined,
    5120             :                         "OpenCL routines reported failure (%d) on line %d.",
    5121             :                         static_cast<int>(err), __LINE__);
    5122           0 :                     eErr = CE_Failure;
    5123           0 :                     throw eErr;
    5124             :                 }
    5125             :             }
    5126             : 
    5127           0 :             err = GDALWarpKernelOpenCL_setSrcImg(
    5128           0 :                 warper, poWK->papabySrcImage[iBand], iBand);
    5129           0 :             if (err != CL_SUCCESS)
    5130             :             {
    5131           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    5132             :                          "OpenCL routines reported failure (%d) on line %d.",
    5133             :                          static_cast<int>(err), __LINE__);
    5134           0 :                 eErr = CE_Failure;
    5135           0 :                 throw eErr;
    5136             :             }
    5137             : 
    5138           0 :             err = GDALWarpKernelOpenCL_setDstImg(
    5139           0 :                 warper, poWK->papabyDstImage[iBand], iBand);
    5140           0 :             if (err != CL_SUCCESS)
    5141             :             {
    5142           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    5143             :                          "OpenCL routines reported failure (%d) on line %d.",
    5144             :                          static_cast<int>(err), __LINE__);
    5145           0 :                 eErr = CE_Failure;
    5146           0 :                 throw eErr;
    5147             :             }
    5148             :         }
    5149             : 
    5150             :         /* --------------------------------------------------------------------
    5151             :          */
    5152             :         /*      Allocate x,y,z coordinate arrays for transformation ... one */
    5153             :         /*      scanlines worth of positions. */
    5154             :         /* --------------------------------------------------------------------
    5155             :          */
    5156             : 
    5157             :         // For x, 2 *, because we cache the precomputed values at the end.
    5158             :         double *padfX =
    5159           0 :             static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5160             :         double *padfY =
    5161           0 :             static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5162             :         double *padfZ =
    5163           0 :             static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5164             :         int *pabSuccess =
    5165           0 :             static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5166           0 :         const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5167           0 :             poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5168           0 :         const double dfErrorThreshold = CPLAtof(CSLFetchNameValueDef(
    5169           0 :             poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5170             : 
    5171             :         // Precompute values.
    5172           0 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5173           0 :             padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5174             : 
    5175             :         /* ====================================================================
    5176             :          */
    5177             :         /*      Loop over output lines. */
    5178             :         /* ====================================================================
    5179             :          */
    5180           0 :         for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; ++iDstY)
    5181             :         {
    5182             :             /* ----------------------------------------------------------------
    5183             :              */
    5184             :             /*      Setup points to transform to source image space. */
    5185             :             /* ----------------------------------------------------------------
    5186             :              */
    5187           0 :             memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5188           0 :             const double dfYConst = iDstY + 0.5 + poWK->nDstYOff;
    5189           0 :             for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5190           0 :                 padfY[iDstX] = dfYConst;
    5191           0 :             memset(padfZ, 0, sizeof(double) * nDstXSize);
    5192             : 
    5193             :             /* ----------------------------------------------------------------
    5194             :              */
    5195             :             /*      Transform the points from destination pixel/line
    5196             :              * coordinates*/
    5197             :             /*      to source pixel/line coordinates. */
    5198             :             /* ----------------------------------------------------------------
    5199             :              */
    5200           0 :             poWK->pfnTransformer(poWK->pTransformerArg, TRUE, nDstXSize, padfX,
    5201             :                                  padfY, padfZ, pabSuccess);
    5202           0 :             if (dfSrcCoordPrecision > 0.0)
    5203             :             {
    5204           0 :                 GWKRoundSourceCoordinates(
    5205             :                     nDstXSize, padfX, padfY, padfZ, pabSuccess,
    5206             :                     dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
    5207             :                     poWK->pTransformerArg, 0.5 + nDstXOff,
    5208           0 :                     iDstY + 0.5 + nDstYOff);
    5209             :             }
    5210             : 
    5211           0 :             err = GDALWarpKernelOpenCL_setCoordRow(
    5212             :                 warper, padfX, padfY, nSrcXOff, nSrcYOff, pabSuccess, iDstY);
    5213           0 :             if (err != CL_SUCCESS)
    5214             :             {
    5215           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    5216             :                          "OpenCL routines reported failure (%d) on line %d.",
    5217             :                          static_cast<int>(err), __LINE__);
    5218           0 :                 eErr = CE_Failure;
    5219           0 :                 break;
    5220             :             }
    5221             : 
    5222             :             // Update the valid & density masks because we don't do so in the
    5223             :             // kernel.
    5224           0 :             for (int iDstX = 0; iDstX < nDstXSize && eErr == CE_None; iDstX++)
    5225             :             {
    5226           0 :                 const double dfX = padfX[iDstX];
    5227           0 :                 const double dfY = padfY[iDstX];
    5228           0 :                 const GPtrDiff_t iDstOffset =
    5229           0 :                     iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5230             : 
    5231             :                 // See GWKGeneralCase() for appropriate commenting.
    5232           0 :                 if (!pabSuccess[iDstX] || dfX < nSrcXOff || dfY < nSrcYOff)
    5233           0 :                     continue;
    5234             : 
    5235           0 :                 int iSrcX = static_cast<int>(dfX) - nSrcXOff;
    5236           0 :                 int iSrcY = static_cast<int>(dfY) - nSrcYOff;
    5237             : 
    5238           0 :                 if (iSrcX < 0 || iSrcX >= nSrcXSize || iSrcY < 0 ||
    5239             :                     iSrcY >= nSrcYSize)
    5240           0 :                     continue;
    5241             : 
    5242           0 :                 GPtrDiff_t iSrcOffset =
    5243           0 :                     iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    5244           0 :                 double dfDensity = 1.0;
    5245             : 
    5246           0 :                 if (poWK->pafUnifiedSrcDensity != nullptr && iSrcX >= 0 &&
    5247           0 :                     iSrcY >= 0 && iSrcX < nSrcXSize && iSrcY < nSrcYSize)
    5248           0 :                     dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5249             : 
    5250           0 :                 GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5251             : 
    5252             :                 // Because this is on the bit-wise level, it can't be done well
    5253             :                 // in OpenCL.
    5254           0 :                 if (poWK->panDstValid != nullptr)
    5255           0 :                     poWK->panDstValid[iDstOffset >> 5] |=
    5256           0 :                         0x01 << (iDstOffset & 0x1f);
    5257             :             }
    5258             :         }
    5259             : 
    5260           0 :         CPLFree(padfX);
    5261           0 :         CPLFree(padfY);
    5262           0 :         CPLFree(padfZ);
    5263           0 :         CPLFree(pabSuccess);
    5264             : 
    5265           0 :         if (eErr != CE_None)
    5266           0 :             throw eErr;
    5267             : 
    5268           0 :         err = GDALWarpKernelOpenCL_runResamp(
    5269             :             warper, poWK->pafUnifiedSrcDensity, poWK->panUnifiedSrcValid,
    5270             :             poWK->pafDstDensity, poWK->panDstValid, poWK->dfXScale,
    5271             :             poWK->dfYScale, poWK->dfXFilter, poWK->dfYFilter, poWK->nXRadius,
    5272             :             poWK->nYRadius, poWK->nFiltInitX, poWK->nFiltInitY);
    5273             : 
    5274           0 :         if (err != CL_SUCCESS)
    5275             :         {
    5276           0 :             CPLError(CE_Failure, CPLE_AppDefined,
    5277             :                      "OpenCL routines reported failure (%d) on line %d.",
    5278             :                      static_cast<int>(err), __LINE__);
    5279           0 :             eErr = CE_Failure;
    5280           0 :             throw eErr;
    5281             :         }
    5282             : 
    5283             :         /* ====================================================================
    5284             :          */
    5285             :         /*      Loop over output lines. */
    5286             :         /* ====================================================================
    5287             :          */
    5288           0 :         for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; iDstY++)
    5289             :         {
    5290           0 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5291             :             {
    5292           0 :                 void *rowReal = nullptr;
    5293           0 :                 void *rowImag = nullptr;
    5294           0 :                 GByte *pabyDst = poWK->papabyDstImage[iBand];
    5295             : 
    5296           0 :                 err = GDALWarpKernelOpenCL_getRow(warper, &rowReal, &rowImag,
    5297             :                                                   iDstY, iBand);
    5298           0 :                 if (err != CL_SUCCESS)
    5299             :                 {
    5300           0 :                     CPLError(
    5301             :                         CE_Failure, CPLE_AppDefined,
    5302             :                         "OpenCL routines reported failure (%d) on line %d.",
    5303             :                         static_cast<int>(err), __LINE__);
    5304           0 :                     eErr = CE_Failure;
    5305           0 :                     throw eErr;
    5306             :                 }
    5307             : 
    5308             :                 // Copy the data from the warper to GDAL's memory.
    5309           0 :                 switch (poWK->eWorkingDataType)
    5310             :                 {
    5311           0 :                     case GDT_Byte:
    5312           0 :                         memcpy(&(pabyDst[iDstY * nDstXSize]), rowReal,
    5313             :                                sizeof(GByte) * nDstXSize);
    5314           0 :                         break;
    5315           0 :                     case GDT_Int16:
    5316           0 :                         memcpy(&(reinterpret_cast<GInt16 *>(
    5317           0 :                                    pabyDst)[iDstY * nDstXSize]),
    5318           0 :                                rowReal, sizeof(GInt16) * nDstXSize);
    5319           0 :                         break;
    5320           0 :                     case GDT_UInt16:
    5321           0 :                         memcpy(&(reinterpret_cast<GUInt16 *>(
    5322           0 :                                    pabyDst)[iDstY * nDstXSize]),
    5323           0 :                                rowReal, sizeof(GUInt16) * nDstXSize);
    5324           0 :                         break;
    5325           0 :                     case GDT_Float32:
    5326           0 :                         memcpy(&(reinterpret_cast<float *>(
    5327           0 :                                    pabyDst)[iDstY * nDstXSize]),
    5328           0 :                                rowReal, sizeof(float) * nDstXSize);
    5329           0 :                         break;
    5330           0 :                     case GDT_CInt16:
    5331             :                     {
    5332           0 :                         GInt16 *pabyDstI16 = &(reinterpret_cast<GInt16 *>(
    5333           0 :                             pabyDst)[iDstY * nDstXSize]);
    5334           0 :                         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5335             :                         {
    5336           0 :                             pabyDstI16[iDstX * 2] =
    5337           0 :                                 static_cast<GInt16 *>(rowReal)[iDstX];
    5338           0 :                             pabyDstI16[iDstX * 2 + 1] =
    5339           0 :                                 static_cast<GInt16 *>(rowImag)[iDstX];
    5340             :                         }
    5341             :                     }
    5342           0 :                     break;
    5343           0 :                     case GDT_CFloat32:
    5344             :                     {
    5345           0 :                         float *pabyDstF32 = &(reinterpret_cast<float *>(
    5346           0 :                             pabyDst)[iDstY * nDstXSize]);
    5347           0 :                         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5348             :                         {
    5349           0 :                             pabyDstF32[iDstX * 2] =
    5350           0 :                                 static_cast<float *>(rowReal)[iDstX];
    5351           0 :                             pabyDstF32[iDstX * 2 + 1] =
    5352           0 :                                 static_cast<float *>(rowImag)[iDstX];
    5353             :                         }
    5354             :                     }
    5355           0 :                     break;
    5356           0 :                     default:
    5357             :                         // No support for higher precision formats.
    5358           0 :                         CPLError(CE_Failure, CPLE_AppDefined,
    5359             :                                  "Unsupported resampling OpenCL data type %d.",
    5360           0 :                                  static_cast<int>(poWK->eWorkingDataType));
    5361           0 :                         eErr = CE_Failure;
    5362           0 :                         throw eErr;
    5363             :                 }
    5364             :             }
    5365             :         }
    5366             :     }
    5367           0 :     catch (const CPLErr &)
    5368             :     {
    5369             :     }
    5370             : 
    5371           0 :     if ((err = GDALWarpKernelOpenCL_deleteEnv(warper)) != CL_SUCCESS)
    5372             :     {
    5373           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    5374             :                  "OpenCL routines reported failure (%d) on line %d.",
    5375             :                  static_cast<int>(err), __LINE__);
    5376           0 :         return CE_Failure;
    5377             :     }
    5378             : 
    5379           0 :     return eErr;
    5380             : }
    5381             : #endif /* defined(HAVE_OPENCL) */
    5382             : 
    5383             : /************************************************************************/
    5384             : /*                     GWKCheckAndComputeSrcOffsets()                   */
    5385             : /************************************************************************/
    5386             : static CPL_INLINE bool
    5387   109634000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
    5388             :                              int _iDstY, double *_padfX, double *_padfY,
    5389             :                              int _nSrcXSize, int _nSrcYSize,
    5390             :                              GPtrDiff_t &iSrcOffset)
    5391             : {
    5392   109634000 :     const GDALWarpKernel *_poWK = psJob->poWK;
    5393   109760000 :     for (int iTry = 0; iTry < 2; ++iTry)
    5394             :     {
    5395   109768000 :         if (iTry == 1)
    5396             :         {
    5397             :             // If the source coordinate is slightly outside of the source raster
    5398             :             // retry to transform it alone, so that the exact coordinate
    5399             :             // transformer is used.
    5400             : 
    5401      125881 :             _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
    5402      125881 :             _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
    5403      125881 :             double dfZ = 0;
    5404      125881 :             _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
    5405      125881 :                                   _padfX + _iDstX, _padfY + _iDstX, &dfZ,
    5406      125881 :                                   _pabSuccess + _iDstX);
    5407             :         }
    5408   109768000 :         if (!_pabSuccess[_iDstX])
    5409     3593470 :             return false;
    5410             : 
    5411             :         // If this happens this is likely the symptom of a bug somewhere.
    5412   106175000 :         if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
    5413             :         {
    5414             :             static bool bNanCoordFound = false;
    5415           0 :             if (!bNanCoordFound)
    5416             :             {
    5417           0 :                 CPLDebug("WARP",
    5418             :                          "GWKCheckAndComputeSrcOffsets(): "
    5419             :                          "NaN coordinate found on point %d.",
    5420             :                          _iDstX);
    5421           0 :                 bNanCoordFound = true;
    5422             :             }
    5423           0 :             return false;
    5424             :         }
    5425             : 
    5426             :         /* --------------------------------------------------------------------
    5427             :          */
    5428             :         /*      Figure out what pixel we want in our source raster, and skip */
    5429             :         /*      further processing if it is well off the source image. */
    5430             :         /* --------------------------------------------------------------------
    5431             :          */
    5432             :         /* We test against the value before casting to avoid the */
    5433             :         /* problem of asymmetric truncation effects around zero.  That is */
    5434             :         /* -0.5 will be 0 when cast to an int. */
    5435   106132000 :         if (_padfX[_iDstX] < _poWK->nSrcXOff)
    5436             :         {
    5437             :             // If the source coordinate is slightly outside of the source raster
    5438             :             // retry to transform it alone, so that the exact coordinate
    5439             :             // transformer is used.
    5440     4137570 :             if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
    5441       21497 :                 continue;
    5442     4116080 :             return false;
    5443             :         }
    5444             : 
    5445   101994000 :         if (_padfY[_iDstX] < _poWK->nSrcYOff)
    5446             :         {
    5447             :             // If the source coordinate is slightly outside of the source raster
    5448             :             // retry to transform it alone, so that the exact coordinate
    5449             :             // transformer is used.
    5450     4793040 :             if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
    5451       38555 :                 continue;
    5452     4754490 :             return false;
    5453             :         }
    5454             : 
    5455             :         // Check for potential overflow when casting from float to int, (if
    5456             :         // operating outside natural projection area, padfX/Y can be a very huge
    5457             :         // positive number before doing the actual conversion), as such cast is
    5458             :         // undefined behavior that can trigger exception with some compilers
    5459             :         // (see #6753)
    5460    97201100 :         if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
    5461             :         {
    5462             :             // If the source coordinate is slightly outside of the source raster
    5463             :             // retry to transform it alone, so that the exact coordinate
    5464             :             // transformer is used.
    5465     3503560 :             if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
    5466       33295 :                 continue;
    5467     3470270 :             return false;
    5468             :         }
    5469    93697500 :         if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
    5470             :         {
    5471             :             // If the source coordinate is slightly outside of the source raster
    5472             :             // retry to transform it alone, so that the exact coordinate
    5473             :             // transformer is used.
    5474     3731490 :             if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
    5475       32536 :                 continue;
    5476     3698950 :             return false;
    5477             :         }
    5478             : 
    5479    89966000 :         break;
    5480             :     }
    5481             : 
    5482    89958300 :     int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
    5483    89958300 :     int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
    5484    89958300 :     if (iSrcX == _nSrcXSize)
    5485           0 :         iSrcX--;
    5486    89958300 :     if (iSrcY == _nSrcYSize)
    5487           0 :         iSrcY--;
    5488             : 
    5489             :     // Those checks should normally be OK given the previous ones.
    5490    89958300 :     CPLAssert(iSrcX >= 0);
    5491    89958300 :     CPLAssert(iSrcY >= 0);
    5492    89958300 :     CPLAssert(iSrcX < _nSrcXSize);
    5493    89958300 :     CPLAssert(iSrcY < _nSrcYSize);
    5494             : 
    5495    89958300 :     iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
    5496             : 
    5497    89958300 :     return true;
    5498             : }
    5499             : 
    5500             : /************************************************************************/
    5501             : /*                   GWKOneSourceCornerFailsToReproject()               */
    5502             : /************************************************************************/
    5503             : 
    5504         737 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
    5505             : {
    5506         737 :     GDALWarpKernel *poWK = psJob->poWK;
    5507        2201 :     for (int iY = 0; iY <= 1; ++iY)
    5508             :     {
    5509        4398 :         for (int iX = 0; iX <= 1; ++iX)
    5510             :         {
    5511        2934 :             double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
    5512        2934 :             double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
    5513        2934 :             double dfZTmp = 0;
    5514        2934 :             int nSuccess = FALSE;
    5515        2934 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
    5516             :                                  &dfYTmp, &dfZTmp, &nSuccess);
    5517        2934 :             if (!nSuccess)
    5518           6 :                 return true;
    5519             :         }
    5520             :     }
    5521         731 :     return false;
    5522             : }
    5523             : 
    5524             : /************************************************************************/
    5525             : /*                       GWKAdjustSrcOffsetOnEdge()                     */
    5526             : /************************************************************************/
    5527             : 
    5528        9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
    5529             :                                      GPtrDiff_t &iSrcOffset)
    5530             : {
    5531        9714 :     GDALWarpKernel *poWK = psJob->poWK;
    5532        9714 :     const int nSrcXSize = poWK->nSrcXSize;
    5533        9714 :     const int nSrcYSize = poWK->nSrcYSize;
    5534             : 
    5535             :     // Check if the computed source position slightly altered
    5536             :     // fails to reproject. If so, then we are at the edge of
    5537             :     // the validity area, and it is worth checking neighbour
    5538             :     // source pixels for validity.
    5539        9714 :     int nSuccess = FALSE;
    5540             :     {
    5541        9714 :         double dfXTmp =
    5542        9714 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5543        9714 :         double dfYTmp =
    5544        9714 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5545        9714 :         double dfZTmp = 0;
    5546        9714 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5547             :                              &dfZTmp, &nSuccess);
    5548             :     }
    5549        9714 :     if (nSuccess)
    5550             :     {
    5551        6996 :         double dfXTmp =
    5552        6996 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5553        6996 :         double dfYTmp =
    5554        6996 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5555        6996 :         double dfZTmp = 0;
    5556        6996 :         nSuccess = FALSE;
    5557        6996 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5558             :                              &dfZTmp, &nSuccess);
    5559             :     }
    5560        9714 :     if (nSuccess)
    5561             :     {
    5562        5624 :         double dfXTmp =
    5563        5624 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5564        5624 :         double dfYTmp =
    5565        5624 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5566        5624 :         double dfZTmp = 0;
    5567        5624 :         nSuccess = FALSE;
    5568        5624 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5569             :                              &dfZTmp, &nSuccess);
    5570             :     }
    5571             : 
    5572       14166 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5573        4452 :         CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
    5574             :     {
    5575        1860 :         iSrcOffset++;
    5576        1860 :         return true;
    5577             :     }
    5578       10290 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5579        2436 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
    5580             :     {
    5581        1334 :         iSrcOffset += nSrcXSize;
    5582        1334 :         return true;
    5583             :     }
    5584        7838 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5585        1318 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
    5586             :     {
    5587         956 :         iSrcOffset--;
    5588         956 :         return true;
    5589             :     }
    5590        5924 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5591         360 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
    5592             :     {
    5593         340 :         iSrcOffset -= nSrcXSize;
    5594         340 :         return true;
    5595             :     }
    5596             : 
    5597        5224 :     return false;
    5598             : }
    5599             : 
    5600             : /************************************************************************/
    5601             : /*                 GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity()          */
    5602             : /************************************************************************/
    5603             : 
    5604           0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
    5605             :                                                       GPtrDiff_t &iSrcOffset)
    5606             : {
    5607           0 :     GDALWarpKernel *poWK = psJob->poWK;
    5608           0 :     const int nSrcXSize = poWK->nSrcXSize;
    5609           0 :     const int nSrcYSize = poWK->nSrcYSize;
    5610             : 
    5611             :     // Check if the computed source position slightly altered
    5612             :     // fails to reproject. If so, then we are at the edge of
    5613             :     // the validity area, and it is worth checking neighbour
    5614             :     // source pixels for validity.
    5615           0 :     int nSuccess = FALSE;
    5616             :     {
    5617           0 :         double dfXTmp =
    5618           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5619           0 :         double dfYTmp =
    5620           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5621           0 :         double dfZTmp = 0;
    5622           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5623             :                              &dfZTmp, &nSuccess);
    5624             :     }
    5625           0 :     if (nSuccess)
    5626             :     {
    5627           0 :         double dfXTmp =
    5628           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5629           0 :         double dfYTmp =
    5630           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5631           0 :         double dfZTmp = 0;
    5632           0 :         nSuccess = FALSE;
    5633           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5634             :                              &dfZTmp, &nSuccess);
    5635             :     }
    5636           0 :     if (nSuccess)
    5637             :     {
    5638           0 :         double dfXTmp =
    5639           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5640           0 :         double dfYTmp =
    5641           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5642           0 :         double dfZTmp = 0;
    5643           0 :         nSuccess = FALSE;
    5644           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5645             :                              &dfZTmp, &nSuccess);
    5646             :     }
    5647             : 
    5648           0 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5649           0 :         poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >= SRC_DENSITY_THRESHOLD)
    5650             :     {
    5651           0 :         iSrcOffset++;
    5652           0 :         return true;
    5653             :     }
    5654           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5655           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
    5656             :                  SRC_DENSITY_THRESHOLD)
    5657             :     {
    5658           0 :         iSrcOffset += nSrcXSize;
    5659           0 :         return true;
    5660             :     }
    5661           0 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5662           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
    5663             :                  SRC_DENSITY_THRESHOLD)
    5664             :     {
    5665           0 :         iSrcOffset--;
    5666           0 :         return true;
    5667             :     }
    5668           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5669           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
    5670             :                  SRC_DENSITY_THRESHOLD)
    5671             :     {
    5672           0 :         iSrcOffset -= nSrcXSize;
    5673           0 :         return true;
    5674             :     }
    5675             : 
    5676           0 :     return false;
    5677             : }
    5678             : 
    5679             : /************************************************************************/
    5680             : /*                           GWKGeneralCase()                           */
    5681             : /*                                                                      */
    5682             : /*      This is the most general case.  It attempts to handle all       */
    5683             : /*      possible features with relatively little concern for            */
    5684             : /*      efficiency.                                                     */
    5685             : /************************************************************************/
    5686             : 
    5687         243 : static void GWKGeneralCaseThread(void *pData)
    5688             : {
    5689         243 :     GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
    5690         243 :     GDALWarpKernel *poWK = psJob->poWK;
    5691         243 :     const int iYMin = psJob->iYMin;
    5692         243 :     const int iYMax = psJob->iYMax;
    5693             :     const double dfMultFactorVerticalShiftPipeline =
    5694         243 :         poWK->bApplyVerticalShift
    5695         243 :             ? CPLAtof(CSLFetchNameValueDef(
    5696           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5697             :                   "1.0"))
    5698         243 :             : 0.0;
    5699             : 
    5700         243 :     int nDstXSize = poWK->nDstXSize;
    5701         243 :     int nSrcXSize = poWK->nSrcXSize;
    5702         243 :     int nSrcYSize = poWK->nSrcYSize;
    5703             : 
    5704             :     /* -------------------------------------------------------------------- */
    5705             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5706             :     /*      scanlines worth of positions.                                   */
    5707             :     /* -------------------------------------------------------------------- */
    5708             :     // For x, 2 *, because we cache the precomputed values at the end.
    5709             :     double *padfX =
    5710         243 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5711             :     double *padfY =
    5712         243 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5713             :     double *padfZ =
    5714         243 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5715         243 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5716             : 
    5717         243 :     const bool bUse4SamplesFormula =
    5718         243 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    5719             : 
    5720         243 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5721         243 :     if (poWK->eResample != GRA_NearestNeighbour)
    5722             :     {
    5723         224 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5724             :     }
    5725         243 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5726         243 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5727         243 :     const double dfErrorThreshold = CPLAtof(
    5728         243 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5729             : 
    5730             :     const bool bOneSourceCornerFailsToReproject =
    5731         243 :         GWKOneSourceCornerFailsToReproject(psJob);
    5732             : 
    5733             :     // Precompute values.
    5734        6513 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5735        6270 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5736             : 
    5737             :     /* ==================================================================== */
    5738             :     /*      Loop over output lines.                                         */
    5739             :     /* ==================================================================== */
    5740        6513 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5741             :     {
    5742             :         /* --------------------------------------------------------------------
    5743             :          */
    5744             :         /*      Setup points to transform to source image space. */
    5745             :         /* --------------------------------------------------------------------
    5746             :          */
    5747        6270 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5748        6270 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5749      242830 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5750      236560 :             padfY[iDstX] = dfY;
    5751        6270 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5752             : 
    5753             :         /* --------------------------------------------------------------------
    5754             :          */
    5755             :         /*      Transform the points from destination pixel/line coordinates */
    5756             :         /*      to source pixel/line coordinates. */
    5757             :         /* --------------------------------------------------------------------
    5758             :          */
    5759        6270 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5760             :                              padfY, padfZ, pabSuccess);
    5761        6270 :         if (dfSrcCoordPrecision > 0.0)
    5762             :         {
    5763           0 :             GWKRoundSourceCoordinates(
    5764             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5765             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5766           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5767             :         }
    5768             : 
    5769             :         /* ====================================================================
    5770             :          */
    5771             :         /*      Loop over pixels in output scanline. */
    5772             :         /* ====================================================================
    5773             :          */
    5774      242830 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5775             :         {
    5776      236560 :             GPtrDiff_t iSrcOffset = 0;
    5777      236560 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5778             :                                               padfX, padfY, nSrcXSize,
    5779             :                                               nSrcYSize, iSrcOffset))
    5780           0 :                 continue;
    5781             : 
    5782             :             /* --------------------------------------------------------------------
    5783             :              */
    5784             :             /*      Do not try to apply transparent/invalid source pixels to the
    5785             :              */
    5786             :             /*      destination.  This currently ignores the multi-pixel input
    5787             :              */
    5788             :             /*      of bilinear and cubic resamples. */
    5789             :             /* --------------------------------------------------------------------
    5790             :              */
    5791      236560 :             double dfDensity = 1.0;
    5792             : 
    5793      236560 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5794             :             {
    5795        1200 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5796        1200 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    5797             :                 {
    5798           0 :                     if (!bOneSourceCornerFailsToReproject)
    5799             :                     {
    5800           0 :                         continue;
    5801             :                     }
    5802           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5803             :                                  psJob, iSrcOffset))
    5804             :                     {
    5805           0 :                         dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5806             :                     }
    5807             :                     else
    5808             :                     {
    5809           0 :                         continue;
    5810             :                     }
    5811             :                 }
    5812             :             }
    5813             : 
    5814      236560 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5815           0 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5816             :             {
    5817           0 :                 if (!bOneSourceCornerFailsToReproject)
    5818             :                 {
    5819           0 :                     continue;
    5820             :                 }
    5821           0 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5822             :                 {
    5823           0 :                     continue;
    5824             :                 }
    5825             :             }
    5826             : 
    5827             :             /* ====================================================================
    5828             :              */
    5829             :             /*      Loop processing each band. */
    5830             :             /* ====================================================================
    5831             :              */
    5832      236560 :             bool bHasFoundDensity = false;
    5833             : 
    5834      236560 :             const GPtrDiff_t iDstOffset =
    5835      236560 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5836      473120 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5837             :             {
    5838      236560 :                 double dfBandDensity = 0.0;
    5839      236560 :                 double dfValueReal = 0.0;
    5840      236560 :                 double dfValueImag = 0.0;
    5841             : 
    5842             :                 /* --------------------------------------------------------------------
    5843             :                  */
    5844             :                 /*      Collect the source value. */
    5845             :                 /* --------------------------------------------------------------------
    5846             :                  */
    5847      236560 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5848             :                     nSrcYSize == 1)
    5849             :                 {
    5850             :                     // FALSE is returned if dfBandDensity == 0, which is
    5851             :                     // checked below.
    5852         568 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValue(
    5853             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
    5854             :                         &dfValueImag));
    5855             :                 }
    5856      235992 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5857             :                 {
    5858         648 :                     GWKBilinearResample4Sample(
    5859         648 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5860         648 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5861             :                         &dfValueReal, &dfValueImag);
    5862             :                 }
    5863      235344 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5864             :                 {
    5865         248 :                     GWKCubicResample4Sample(
    5866         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5867         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5868             :                         &dfValueReal, &dfValueImag);
    5869             :                 }
    5870             :                 else
    5871             : #ifdef DEBUG
    5872             :                     // Only useful for clang static analyzer.
    5873      235096 :                     if (psWrkStruct != nullptr)
    5874             : #endif
    5875             :                     {
    5876      235096 :                         psWrkStruct->pfnGWKResample(
    5877      235096 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5878      235096 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5879             :                             &dfValueReal, &dfValueImag, psWrkStruct);
    5880             :                     }
    5881             : 
    5882             :                 // If we didn't find any valid inputs skip to next band.
    5883      236560 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5884           0 :                     continue;
    5885             : 
    5886      236560 :                 if (poWK->bApplyVerticalShift)
    5887             :                 {
    5888           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5889           0 :                         continue;
    5890             :                     // Subtract padfZ[] since the coordinate transformation is
    5891             :                     // from target to source
    5892           0 :                     dfValueReal =
    5893           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5894           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5895             :                 }
    5896             : 
    5897      236560 :                 bHasFoundDensity = true;
    5898             : 
    5899             :                 /* --------------------------------------------------------------------
    5900             :                  */
    5901             :                 /*      We have a computed value from the source.  Now apply it
    5902             :                  * to      */
    5903             :                 /*      the destination pixel. */
    5904             :                 /* --------------------------------------------------------------------
    5905             :                  */
    5906      236560 :                 GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    5907             :                                  dfValueReal, dfValueImag);
    5908             :             }
    5909             : 
    5910      236560 :             if (!bHasFoundDensity)
    5911           0 :                 continue;
    5912             : 
    5913             :             /* --------------------------------------------------------------------
    5914             :              */
    5915             :             /*      Update destination density/validity masks. */
    5916             :             /* --------------------------------------------------------------------
    5917             :              */
    5918      236560 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5919             : 
    5920      236560 :             if (poWK->panDstValid != nullptr)
    5921             :             {
    5922           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5923             :             }
    5924             :         } /* Next iDstX */
    5925             : 
    5926             :         /* --------------------------------------------------------------------
    5927             :          */
    5928             :         /*      Report progress to the user, and optionally cancel out. */
    5929             :         /* --------------------------------------------------------------------
    5930             :          */
    5931        6270 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5932           0 :             break;
    5933             :     }
    5934             : 
    5935             :     /* -------------------------------------------------------------------- */
    5936             :     /*      Cleanup and return.                                             */
    5937             :     /* -------------------------------------------------------------------- */
    5938         243 :     CPLFree(padfX);
    5939         243 :     CPLFree(padfY);
    5940         243 :     CPLFree(padfZ);
    5941         243 :     CPLFree(pabSuccess);
    5942         243 :     if (psWrkStruct)
    5943         224 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5944         243 : }
    5945             : 
    5946         243 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
    5947             : {
    5948         243 :     return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
    5949             : }
    5950             : 
    5951             : /************************************************************************/
    5952             : /*                            GWKRealCase()                             */
    5953             : /*                                                                      */
    5954             : /*      General case for non-complex data types.                        */
    5955             : /************************************************************************/
    5956             : 
    5957         155 : static void GWKRealCaseThread(void *pData)
    5958             : 
    5959             : {
    5960         155 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    5961         155 :     GDALWarpKernel *poWK = psJob->poWK;
    5962         155 :     const int iYMin = psJob->iYMin;
    5963         155 :     const int iYMax = psJob->iYMax;
    5964             : 
    5965         155 :     const int nDstXSize = poWK->nDstXSize;
    5966         155 :     const int nSrcXSize = poWK->nSrcXSize;
    5967         155 :     const int nSrcYSize = poWK->nSrcYSize;
    5968             :     const double dfMultFactorVerticalShiftPipeline =
    5969         155 :         poWK->bApplyVerticalShift
    5970         155 :             ? CPLAtof(CSLFetchNameValueDef(
    5971           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5972             :                   "1.0"))
    5973         155 :             : 0.0;
    5974             : 
    5975             :     /* -------------------------------------------------------------------- */
    5976             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5977             :     /*      scanlines worth of positions.                                   */
    5978             :     /* -------------------------------------------------------------------- */
    5979             : 
    5980             :     // For x, 2 *, because we cache the precomputed values at the end.
    5981             :     double *padfX =
    5982         155 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5983             :     double *padfY =
    5984         155 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5985             :     double *padfZ =
    5986         155 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5987         155 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5988             : 
    5989         155 :     const bool bUse4SamplesFormula =
    5990         155 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    5991             : 
    5992         155 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5993         155 :     if (poWK->eResample != GRA_NearestNeighbour)
    5994             :     {
    5995         128 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5996             :     }
    5997         155 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5998         155 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5999         155 :     const double dfErrorThreshold = CPLAtof(
    6000         155 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6001             : 
    6002         450 :     const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
    6003         295 :                                    poWK->papanBandSrcValid == nullptr &&
    6004         140 :                                    poWK->pafUnifiedSrcDensity != nullptr;
    6005             : 
    6006             :     const bool bOneSourceCornerFailsToReproject =
    6007         155 :         GWKOneSourceCornerFailsToReproject(psJob);
    6008             : 
    6009             :     // Precompute values.
    6010       19572 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6011       19417 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6012             : 
    6013             :     /* ==================================================================== */
    6014             :     /*      Loop over output lines.                                         */
    6015             :     /* ==================================================================== */
    6016       22275 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6017             :     {
    6018             :         /* --------------------------------------------------------------------
    6019             :          */
    6020             :         /*      Setup points to transform to source image space. */
    6021             :         /* --------------------------------------------------------------------
    6022             :          */
    6023       22120 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6024       22120 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6025    43558800 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6026    43536700 :             padfY[iDstX] = dfY;
    6027       22120 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6028             : 
    6029             :         /* --------------------------------------------------------------------
    6030             :          */
    6031             :         /*      Transform the points from destination pixel/line coordinates */
    6032             :         /*      to source pixel/line coordinates. */
    6033             :         /* --------------------------------------------------------------------
    6034             :          */
    6035       22120 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6036             :                              padfY, padfZ, pabSuccess);
    6037       22120 :         if (dfSrcCoordPrecision > 0.0)
    6038             :         {
    6039           0 :             GWKRoundSourceCoordinates(
    6040             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6041             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6042           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6043             :         }
    6044             : 
    6045             :         /* ====================================================================
    6046             :          */
    6047             :         /*      Loop over pixels in output scanline. */
    6048             :         /* ====================================================================
    6049             :          */
    6050    43558800 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6051             :         {
    6052    43536700 :             GPtrDiff_t iSrcOffset = 0;
    6053    43536700 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6054             :                                               padfX, padfY, nSrcXSize,
    6055             :                                               nSrcYSize, iSrcOffset))
    6056    42894000 :                 continue;
    6057             : 
    6058             :             /* --------------------------------------------------------------------
    6059             :              */
    6060             :             /*      Do not try to apply transparent/invalid source pixels to the
    6061             :              */
    6062             :             /*      destination.  This currently ignores the multi-pixel input
    6063             :              */
    6064             :             /*      of bilinear and cubic resamples. */
    6065             :             /* --------------------------------------------------------------------
    6066             :              */
    6067    31480200 :             double dfDensity = 1.0;
    6068             : 
    6069    31480200 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    6070             :             {
    6071     1360180 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    6072     1360180 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    6073             :                 {
    6074     1308710 :                     if (!bOneSourceCornerFailsToReproject)
    6075             :                     {
    6076     1308710 :                         continue;
    6077             :                     }
    6078           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    6079             :                                  psJob, iSrcOffset))
    6080             :                     {
    6081           0 :                         dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    6082             :                     }
    6083             :                     else
    6084             :                     {
    6085           0 :                         continue;
    6086             :                     }
    6087             :                 }
    6088             :             }
    6089             : 
    6090    59800100 :             if (poWK->panUnifiedSrcValid != nullptr &&
    6091    29628600 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6092             :             {
    6093    29531000 :                 if (!bOneSourceCornerFailsToReproject)
    6094             :                 {
    6095    29528700 :                     continue;
    6096             :                 }
    6097        2229 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    6098             :                 {
    6099           0 :                     continue;
    6100             :                 }
    6101             :             }
    6102             : 
    6103             :             /* ====================================================================
    6104             :              */
    6105             :             /*      Loop processing each band. */
    6106             :             /* ====================================================================
    6107             :              */
    6108      642736 :             bool bHasFoundDensity = false;
    6109             : 
    6110      642736 :             const GPtrDiff_t iDstOffset =
    6111      642736 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6112     1717090 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6113             :             {
    6114     1074360 :                 double dfBandDensity = 0.0;
    6115     1074360 :                 double dfValueReal = 0.0;
    6116             : 
    6117             :                 /* --------------------------------------------------------------------
    6118             :                  */
    6119             :                 /*      Collect the source value. */
    6120             :                 /* --------------------------------------------------------------------
    6121             :                  */
    6122     1074360 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    6123             :                     nSrcYSize == 1)
    6124             :                 {
    6125             :                     // FALSE is returned if dfBandDensity == 0, which is
    6126             :                     // checked below.
    6127      151448 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
    6128             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
    6129             :                 }
    6130      922909 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    6131             :                 {
    6132        1486 :                     double dfValueImagIgnored = 0.0;
    6133        1486 :                     GWKBilinearResample4Sample(
    6134        1486 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6135        1486 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6136        1486 :                         &dfValueReal, &dfValueImagIgnored);
    6137             :                 }
    6138      921423 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    6139             :                 {
    6140      299992 :                     if (bSrcMaskIsDensity)
    6141             :                     {
    6142         361 :                         if (poWK->eWorkingDataType == GDT_Byte)
    6143             :                         {
    6144         361 :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
    6145         361 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6146         361 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6147             :                                 &dfValueReal);
    6148             :                         }
    6149           0 :                         else if (poWK->eWorkingDataType == GDT_UInt16)
    6150             :                         {
    6151             :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<
    6152           0 :                                 GUInt16>(poWK, iBand,
    6153           0 :                                          padfX[iDstX] - poWK->nSrcXOff,
    6154           0 :                                          padfY[iDstX] - poWK->nSrcYOff,
    6155             :                                          &dfBandDensity, &dfValueReal);
    6156             :                         }
    6157             :                         else
    6158             :                         {
    6159           0 :                             GWKCubicResampleSrcMaskIsDensity4SampleReal(
    6160           0 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6161           0 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6162             :                                 &dfValueReal);
    6163             :                         }
    6164             :                     }
    6165             :                     else
    6166             :                     {
    6167      299631 :                         double dfValueImagIgnored = 0.0;
    6168      299631 :                         GWKCubicResample4Sample(
    6169      299631 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6170      299631 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6171             :                             &dfValueReal, &dfValueImagIgnored);
    6172      299992 :                     }
    6173             :                 }
    6174             :                 else
    6175             : #ifdef DEBUG
    6176             :                     // Only useful for clang static analyzer.
    6177      621431 :                     if (psWrkStruct != nullptr)
    6178             : #endif
    6179             :                     {
    6180      621431 :                         double dfValueImagIgnored = 0.0;
    6181      621431 :                         psWrkStruct->pfnGWKResample(
    6182      621431 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6183      621431 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6184             :                             &dfValueReal, &dfValueImagIgnored, psWrkStruct);
    6185             :                     }
    6186             : 
    6187             :                 // If we didn't find any valid inputs skip to next band.
    6188     1074360 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    6189           0 :                     continue;
    6190             : 
    6191     1074360 :                 if (poWK->bApplyVerticalShift)
    6192             :                 {
    6193           0 :                     if (!std::isfinite(padfZ[iDstX]))
    6194           0 :                         continue;
    6195             :                     // Subtract padfZ[] since the coordinate transformation is
    6196             :                     // from target to source
    6197           0 :                     dfValueReal =
    6198           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    6199           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    6200             :                 }
    6201             : 
    6202     1074360 :                 bHasFoundDensity = true;
    6203             : 
    6204             :                 /* --------------------------------------------------------------------
    6205             :                  */
    6206             :                 /*      We have a computed value from the source.  Now apply it
    6207             :                  * to      */
    6208             :                 /*      the destination pixel. */
    6209             :                 /* --------------------------------------------------------------------
    6210             :                  */
    6211     1074360 :                 GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
    6212             :                                      dfValueReal);
    6213             :             }
    6214             : 
    6215      642736 :             if (!bHasFoundDensity)
    6216           0 :                 continue;
    6217             : 
    6218             :             /* --------------------------------------------------------------------
    6219             :              */
    6220             :             /*      Update destination density/validity masks. */
    6221             :             /* --------------------------------------------------------------------
    6222             :              */
    6223      642736 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6224             : 
    6225      642736 :             if (poWK->panDstValid != nullptr)
    6226             :             {
    6227      101716 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6228             :             }
    6229             :         }  // Next iDstX.
    6230             : 
    6231             :         /* --------------------------------------------------------------------
    6232             :          */
    6233             :         /*      Report progress to the user, and optionally cancel out. */
    6234             :         /* --------------------------------------------------------------------
    6235             :          */
    6236       22120 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6237           0 :             break;
    6238             :     }
    6239             : 
    6240             :     /* -------------------------------------------------------------------- */
    6241             :     /*      Cleanup and return.                                             */
    6242             :     /* -------------------------------------------------------------------- */
    6243         155 :     CPLFree(padfX);
    6244         155 :     CPLFree(padfY);
    6245         155 :     CPLFree(padfZ);
    6246         155 :     CPLFree(pabSuccess);
    6247         155 :     if (psWrkStruct)
    6248         128 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    6249         155 : }
    6250             : 
    6251         155 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
    6252             : {
    6253         155 :     return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
    6254             : }
    6255             : 
    6256             : /************************************************************************/
    6257             : /*                 GWKCubicResampleNoMasks4MultiBandT()                 */
    6258             : /************************************************************************/
    6259             : 
    6260             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    6261             : /* and enough SSE registries */
    6262             : #if defined(USE_SSE2)
    6263             : 
    6264      238596 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
    6265             :                                  const __m128 row2, const __m128 row3,
    6266             :                                  const __m128 weightsXY0,
    6267             :                                  const __m128 weightsXY1,
    6268             :                                  const __m128 weightsXY2,
    6269             :                                  const __m128 weightsXY3)
    6270             : {
    6271     1670170 :     return XMMHorizontalAdd(_mm_add_ps(
    6272             :         _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
    6273             :         _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
    6274      238596 :                    _mm_mul_ps(row3, weightsXY3))));
    6275             : }
    6276             : 
    6277             : template <class T>
    6278       81323 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
    6279             :                                                double dfSrcX, double dfSrcY,
    6280             :                                                const GPtrDiff_t iDstOffset)
    6281             : {
    6282       81323 :     const double dfSrcXShifted = dfSrcX - 0.5;
    6283       81323 :     const int iSrcX = static_cast<int>(dfSrcXShifted);
    6284       81323 :     const double dfSrcYShifted = dfSrcY - 0.5;
    6285       81323 :     const int iSrcY = static_cast<int>(dfSrcYShifted);
    6286       81323 :     const GPtrDiff_t iSrcOffset =
    6287       81323 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    6288             : 
    6289             :     // Get the bilinear interpolation at the image borders.
    6290       81323 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    6291       80326 :         iSrcY + 2 >= poWK->nSrcYSize)
    6292             :     {
    6293        7164 :         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6294             :         {
    6295             :             T value;
    6296        5373 :             GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    6297             :                                                &value);
    6298        5373 :             reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6299             :                 value;
    6300        1791 :         }
    6301             :     }
    6302             :     else
    6303             :     {
    6304       79532 :         const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
    6305       79532 :         const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
    6306             : 
    6307             :         float afCoeffsX[4];
    6308             :         float afCoeffsY[4];
    6309       79532 :         GWKCubicComputeWeights(fDeltaX, afCoeffsX);
    6310       79532 :         GWKCubicComputeWeights(fDeltaY, afCoeffsY);
    6311       79532 :         const auto weightsX = _mm_loadu_ps(afCoeffsX);
    6312             :         const auto weightsXY0 =
    6313      159064 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
    6314             :         const auto weightsXY1 =
    6315      159064 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
    6316             :         const auto weightsXY2 =
    6317      159064 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
    6318             :         const auto weightsXY3 =
    6319       79532 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
    6320             : 
    6321       79532 :         const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
    6322             : 
    6323       79532 :         int iBand = 0;
    6324             :         // Process 2 bands at a time
    6325      159064 :         for (; iBand + 1 < poWK->nBands; iBand += 2)
    6326             :         {
    6327       79532 :             const T *CPL_RESTRICT pBand0 =
    6328       79532 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6329       79532 :             const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
    6330             :             const auto row1_0 =
    6331       79532 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6332             :             const auto row2_0 =
    6333       79532 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6334             :             const auto row3_0 =
    6335       79532 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6336             : 
    6337       79532 :             const T *CPL_RESTRICT pBand1 =
    6338       79532 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
    6339       79532 :             const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
    6340             :             const auto row1_1 =
    6341       79532 :                 XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
    6342             :             const auto row2_1 =
    6343       79532 :                 XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
    6344             :             const auto row3_1 =
    6345       79532 :                 XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
    6346             : 
    6347             :             const float fValue_0 =
    6348       79532 :                 Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
    6349             :                              weightsXY1, weightsXY2, weightsXY3);
    6350             : 
    6351             :             const float fValue_1 =
    6352       79532 :                 Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
    6353             :                              weightsXY1, weightsXY2, weightsXY3);
    6354             : 
    6355       79532 :             T *CPL_RESTRICT pDstBand0 =
    6356       79532 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6357       79532 :             pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
    6358             : 
    6359       79532 :             T *CPL_RESTRICT pDstBand1 =
    6360       79532 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
    6361       79532 :             pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
    6362             :         }
    6363       79532 :         if (iBand < poWK->nBands)
    6364             :         {
    6365       79532 :             const T *CPL_RESTRICT pBand0 =
    6366       79532 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6367       79532 :             const auto row0 = XMMLoad4Values(pBand0 + iOffset);
    6368             :             const auto row1 =
    6369       79532 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6370             :             const auto row2 =
    6371       79532 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6372             :             const auto row3 =
    6373       79532 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6374             : 
    6375             :             const float fValue =
    6376       79532 :                 Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
    6377             :                              weightsXY2, weightsXY3);
    6378             : 
    6379       79532 :             T *CPL_RESTRICT pDstBand =
    6380       79532 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6381       79532 :             pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
    6382             :         }
    6383             :     }
    6384             : 
    6385       81323 :     if (poWK->pafDstDensity)
    6386         441 :         poWK->pafDstDensity[iDstOffset] = 1.0f;
    6387       81323 : }
    6388             : 
    6389             : #endif  // defined(USE_SSE2)
    6390             : 
    6391             : /************************************************************************/
    6392             : /*                GWKResampleNoMasksOrDstDensityOnlyThreadInternal()    */
    6393             : /************************************************************************/
    6394             : 
    6395             : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
    6396        1177 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
    6397             : 
    6398             : {
    6399        1177 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6400        1177 :     GDALWarpKernel *poWK = psJob->poWK;
    6401        1177 :     const int iYMin = psJob->iYMin;
    6402        1177 :     const int iYMax = psJob->iYMax;
    6403        1159 :     const double dfMultFactorVerticalShiftPipeline =
    6404        1177 :         poWK->bApplyVerticalShift
    6405          18 :             ? CPLAtof(CSLFetchNameValueDef(
    6406          18 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6407             :                   "1.0"))
    6408             :             : 0.0;
    6409             : 
    6410        1177 :     const int nDstXSize = poWK->nDstXSize;
    6411        1177 :     const int nSrcXSize = poWK->nSrcXSize;
    6412        1177 :     const int nSrcYSize = poWK->nSrcYSize;
    6413             : 
    6414             :     /* -------------------------------------------------------------------- */
    6415             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6416             :     /*      scanlines worth of positions.                                   */
    6417             :     /* -------------------------------------------------------------------- */
    6418             : 
    6419             :     // For x, 2 *, because we cache the precomputed values at the end.
    6420             :     double *padfX =
    6421        1177 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6422             :     double *padfY =
    6423        1177 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6424             :     double *padfZ =
    6425        1177 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6426        1177 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6427             : 
    6428        1177 :     const int nXRadius = poWK->nXRadius;
    6429             :     double *padfWeightsX =
    6430        1177 :         static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
    6431             :     double *padfWeightsY = static_cast<double *>(
    6432        1177 :         CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
    6433        1177 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6434        1177 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6435        1177 :     const double dfErrorThreshold = CPLAtof(
    6436        1177 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6437             : 
    6438             :     // Precompute values.
    6439      254841 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6440      253664 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6441             : 
    6442             :     /* ==================================================================== */
    6443             :     /*      Loop over output lines.                                         */
    6444             :     /* ==================================================================== */
    6445      130049 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6446             :     {
    6447             :         /* --------------------------------------------------------------------
    6448             :          */
    6449             :         /*      Setup points to transform to source image space. */
    6450             :         /* --------------------------------------------------------------------
    6451             :          */
    6452      128873 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6453      128873 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6454    58394094 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6455    58265266 :             padfY[iDstX] = dfY;
    6456      128873 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6457             : 
    6458             :         /* --------------------------------------------------------------------
    6459             :          */
    6460             :         /*      Transform the points from destination pixel/line coordinates */
    6461             :         /*      to source pixel/line coordinates. */
    6462             :         /* --------------------------------------------------------------------
    6463             :          */
    6464      128873 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6465             :                              padfY, padfZ, pabSuccess);
    6466      128873 :         if (dfSrcCoordPrecision > 0.0)
    6467             :         {
    6468        1000 :             GWKRoundSourceCoordinates(
    6469             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6470             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6471        1000 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6472             :         }
    6473             : 
    6474             :         /* ====================================================================
    6475             :          */
    6476             :         /*      Loop over pixels in output scanline. */
    6477             :         /* ====================================================================
    6478             :          */
    6479    58195534 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6480             :         {
    6481    58066596 :             GPtrDiff_t iSrcOffset = 0;
    6482    58066596 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6483             :                                               padfX, padfY, nSrcXSize,
    6484             :                                               nSrcYSize, iSrcOffset))
    6485     6545209 :                 continue;
    6486             : 
    6487             :             /* ====================================================================
    6488             :              */
    6489             :             /*      Loop processing each band. */
    6490             :             /* ====================================================================
    6491             :              */
    6492    51759962 :             const GPtrDiff_t iDstOffset =
    6493    51759962 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6494             : 
    6495             : #if defined(USE_SSE2)
    6496             :             if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
    6497             :                           (std::is_same<T, GByte>::value ||
    6498             :                            std::is_same<T, GUInt16>::value))
    6499             :             {
    6500      752574 :                 if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
    6501             :                 {
    6502       81323 :                     GWKCubicResampleNoMasks4MultiBandT<T>(
    6503       81323 :                         poWK, padfX[iDstX] - poWK->nSrcXOff,
    6504       81323 :                         padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
    6505             : 
    6506       81323 :                     continue;
    6507             :                 }
    6508             :             }
    6509             : #endif  // defined(USE_SSE2)
    6510             : 
    6511    51678639 :             [[maybe_unused]] double dfInvWeights = 0;
    6512   144421098 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6513             :             {
    6514    92899567 :                 T value = 0;
    6515             :                 if constexpr (eResample == GRA_NearestNeighbour)
    6516             :                 {
    6517    77005949 :                     value = reinterpret_cast<T *>(
    6518    77005949 :                         poWK->papabySrcImage[iBand])[iSrcOffset];
    6519             :                 }
    6520             :                 else if constexpr (bUse4SamplesFormula)
    6521             :                 {
    6522             :                     if constexpr (eResample == GRA_Bilinear)
    6523     4806886 :                         GWKBilinearResampleNoMasks4SampleT(
    6524     4806886 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6525     4806886 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6526             :                     else
    6527     1906603 :                         GWKCubicResampleNoMasks4SampleT(
    6528     1906603 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6529     1906603 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6530             :                 }
    6531             :                 else
    6532             :                 {
    6533     9180129 :                     GWKResampleNoMasksT(
    6534     9180129 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6535     9180129 :                         padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
    6536             :                         padfWeightsY, dfInvWeights);
    6537             :                 }
    6538             : 
    6539    92899487 :                 if (poWK->bApplyVerticalShift)
    6540             :                 {
    6541         818 :                     if (!std::isfinite(padfZ[iDstX]))
    6542           0 :                         continue;
    6543             :                     // Subtract padfZ[] since the coordinate transformation is
    6544             :                     // from target to source
    6545           3 :                     value = GWKClampValueT<T>(
    6546         818 :                         value * poWK->dfMultFactorVerticalShift -
    6547         818 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6548             :                 }
    6549             : 
    6550    92742327 :                 if (poWK->pafDstDensity)
    6551    11712299 :                     poWK->pafDstDensity[iDstOffset] = 1.0f;
    6552             : 
    6553    92742327 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6554             :                     value;
    6555             :             }
    6556             :         }
    6557             : 
    6558             :         /* --------------------------------------------------------------------
    6559             :          */
    6560             :         /*      Report progress to the user, and optionally cancel out. */
    6561             :         /* --------------------------------------------------------------------
    6562             :          */
    6563      128873 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6564           1 :             break;
    6565             :     }
    6566             : 
    6567             :     /* -------------------------------------------------------------------- */
    6568             :     /*      Cleanup and return.                                             */
    6569             :     /* -------------------------------------------------------------------- */
    6570        1177 :     CPLFree(padfX);
    6571        1177 :     CPLFree(padfY);
    6572        1177 :     CPLFree(padfZ);
    6573        1177 :     CPLFree(pabSuccess);
    6574        1177 :     CPLFree(padfWeightsX);
    6575        1177 :     CPLFree(padfWeightsY);
    6576        1177 : }
    6577             : 
    6578             : template <class T, GDALResampleAlg eResample>
    6579         921 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
    6580             : {
    6581         921 :     GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6582             :         pData);
    6583         921 : }
    6584             : 
    6585             : template <class T, GDALResampleAlg eResample>
    6586         256 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
    6587             : 
    6588             : {
    6589         256 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6590         256 :     GDALWarpKernel *poWK = psJob->poWK;
    6591             :     static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
    6592         256 :     const bool bUse4SamplesFormula =
    6593         256 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    6594         256 :     if (bUse4SamplesFormula)
    6595         156 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
    6596             :             pData);
    6597             :     else
    6598         100 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6599             :             pData);
    6600         256 : }
    6601             : 
    6602         866 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6603             : {
    6604         866 :     return GWKRun(
    6605             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
    6606         866 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
    6607             : }
    6608             : 
    6609         126 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6610             : {
    6611         126 :     return GWKRun(
    6612             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
    6613             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
    6614         126 :                                                            GRA_Bilinear>);
    6615             : }
    6616             : 
    6617          72 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6618             : {
    6619          72 :     return GWKRun(
    6620             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
    6621          72 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
    6622             : }
    6623             : 
    6624           9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6625             : {
    6626           9 :     return GWKRun(
    6627             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
    6628           9 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
    6629             : }
    6630             : 
    6631             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6632             : 
    6633             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6634             : {
    6635             :     return GWKRun(
    6636             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
    6637             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
    6638             : }
    6639             : #endif
    6640             : 
    6641          12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6642             : {
    6643          12 :     return GWKRun(
    6644             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
    6645          12 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
    6646             : }
    6647             : 
    6648             : /************************************************************************/
    6649             : /*                          GWKNearestByte()                            */
    6650             : /*                                                                      */
    6651             : /*      Case for 8bit input data with nearest neighbour resampling      */
    6652             : /*      using valid flags. Should be as fast as possible for this       */
    6653             : /*      particular transformation type.                                 */
    6654             : /************************************************************************/
    6655             : 
    6656         339 : template <class T> static void GWKNearestThread(void *pData)
    6657             : 
    6658             : {
    6659         339 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6660         339 :     GDALWarpKernel *poWK = psJob->poWK;
    6661         339 :     const int iYMin = psJob->iYMin;
    6662         339 :     const int iYMax = psJob->iYMax;
    6663         339 :     const double dfMultFactorVerticalShiftPipeline =
    6664         339 :         poWK->bApplyVerticalShift
    6665           0 :             ? CPLAtof(CSLFetchNameValueDef(
    6666           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6667             :                   "1.0"))
    6668             :             : 0.0;
    6669             : 
    6670         339 :     const int nDstXSize = poWK->nDstXSize;
    6671         339 :     const int nSrcXSize = poWK->nSrcXSize;
    6672         339 :     const int nSrcYSize = poWK->nSrcYSize;
    6673             : 
    6674             :     /* -------------------------------------------------------------------- */
    6675             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6676             :     /*      scanlines worth of positions.                                   */
    6677             :     /* -------------------------------------------------------------------- */
    6678             : 
    6679             :     // For x, 2 *, because we cache the precomputed values at the end.
    6680             :     double *padfX =
    6681         339 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6682             :     double *padfY =
    6683         339 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6684             :     double *padfZ =
    6685         339 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6686         339 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6687             : 
    6688         339 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6689         339 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6690         339 :     const double dfErrorThreshold = CPLAtof(
    6691         339 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6692             : 
    6693             :     const bool bOneSourceCornerFailsToReproject =
    6694         339 :         GWKOneSourceCornerFailsToReproject(psJob);
    6695             : 
    6696             :     // Precompute values.
    6697       48911 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6698       48572 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6699             : 
    6700             :     /* ==================================================================== */
    6701             :     /*      Loop over output lines.                                         */
    6702             :     /* ==================================================================== */
    6703       36409 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6704             :     {
    6705             : 
    6706             :         /* --------------------------------------------------------------------
    6707             :          */
    6708             :         /*      Setup points to transform to source image space. */
    6709             :         /* --------------------------------------------------------------------
    6710             :          */
    6711       36070 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6712       36070 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6713     7637615 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6714     7601550 :             padfY[iDstX] = dfY;
    6715       36070 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6716             : 
    6717             :         /* --------------------------------------------------------------------
    6718             :          */
    6719             :         /*      Transform the points from destination pixel/line coordinates */
    6720             :         /*      to source pixel/line coordinates. */
    6721             :         /* --------------------------------------------------------------------
    6722             :          */
    6723       36070 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6724             :                              padfY, padfZ, pabSuccess);
    6725       36070 :         if (dfSrcCoordPrecision > 0.0)
    6726             :         {
    6727           0 :             GWKRoundSourceCoordinates(
    6728             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6729             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6730           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6731             :         }
    6732             :         /* ====================================================================
    6733             :          */
    6734             :         /*      Loop over pixels in output scanline. */
    6735             :         /* ====================================================================
    6736             :          */
    6737     7637615 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6738             :         {
    6739     7601550 :             GPtrDiff_t iSrcOffset = 0;
    6740     7601550 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6741             :                                               padfX, padfY, nSrcXSize,
    6742             :                                               nSrcYSize, iSrcOffset))
    6743     2117518 :                 continue;
    6744             : 
    6745             :             /* --------------------------------------------------------------------
    6746             :              */
    6747             :             /*      Do not try to apply invalid source pixels to the dest. */
    6748             :             /* --------------------------------------------------------------------
    6749             :              */
    6750     7419936 :             if (poWK->panUnifiedSrcValid != nullptr &&
    6751      931241 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6752             :             {
    6753       49670 :                 if (!bOneSourceCornerFailsToReproject)
    6754             :                 {
    6755       42185 :                     continue;
    6756             :                 }
    6757        7485 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    6758             :                 {
    6759        5224 :                     continue;
    6760             :                 }
    6761             :             }
    6762             : 
    6763             :             /* --------------------------------------------------------------------
    6764             :              */
    6765             :             /*      Do not try to apply transparent source pixels to the
    6766             :              * destination.*/
    6767             :             /* --------------------------------------------------------------------
    6768             :              */
    6769     6441284 :             double dfDensity = 1.0;
    6770             : 
    6771     6441284 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    6772             :             {
    6773     1064945 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    6774     1064945 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    6775      957251 :                     continue;
    6776             :             }
    6777             : 
    6778             :             /* ====================================================================
    6779             :              */
    6780             :             /*      Loop processing each band. */
    6781             :             /* ====================================================================
    6782             :              */
    6783             : 
    6784     5484032 :             const GPtrDiff_t iDstOffset =
    6785     5484032 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6786             : 
    6787    12643414 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6788             :             {
    6789     7159332 :                 T value = 0;
    6790     7159332 :                 double dfBandDensity = 0.0;
    6791             : 
    6792             :                 /* --------------------------------------------------------------------
    6793             :                  */
    6794             :                 /*      Collect the source value. */
    6795             :                 /* --------------------------------------------------------------------
    6796             :                  */
    6797     7159332 :                 if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
    6798             :                                  &value))
    6799             :                 {
    6800             : 
    6801     7159332 :                     if (poWK->bApplyVerticalShift)
    6802             :                     {
    6803           0 :                         if (!std::isfinite(padfZ[iDstX]))
    6804           0 :                             continue;
    6805             :                         // Subtract padfZ[] since the coordinate transformation
    6806             :                         // is from target to source
    6807           0 :                         value = GWKClampValueT<T>(
    6808           0 :                             value * poWK->dfMultFactorVerticalShift -
    6809           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6810             :                     }
    6811             : 
    6812     7159332 :                     GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    6813             :                                           dfBandDensity, value);
    6814             :                 }
    6815             :             }
    6816             : 
    6817             :             /* --------------------------------------------------------------------
    6818             :              */
    6819             :             /*      Mark this pixel valid/opaque in the output. */
    6820             :             /* --------------------------------------------------------------------
    6821             :              */
    6822     5484032 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6823             : 
    6824     5484032 :             if (poWK->panDstValid != nullptr)
    6825             :             {
    6826     4854774 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6827             :             }
    6828             :         } /* Next iDstX */
    6829             : 
    6830             :         /* --------------------------------------------------------------------
    6831             :          */
    6832             :         /*      Report progress to the user, and optionally cancel out. */
    6833             :         /* --------------------------------------------------------------------
    6834             :          */
    6835       36070 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6836           0 :             break;
    6837             :     }
    6838             : 
    6839             :     /* -------------------------------------------------------------------- */
    6840             :     /*      Cleanup and return.                                             */
    6841             :     /* -------------------------------------------------------------------- */
    6842         339 :     CPLFree(padfX);
    6843         339 :     CPLFree(padfY);
    6844         339 :     CPLFree(padfZ);
    6845         339 :     CPLFree(pabSuccess);
    6846         339 : }
    6847             : 
    6848         274 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
    6849             : {
    6850         274 :     return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
    6851             : }
    6852             : 
    6853          18 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6854             : {
    6855          18 :     return GWKRun(
    6856             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
    6857          18 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
    6858             : }
    6859             : 
    6860          18 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6861             : {
    6862          18 :     return GWKRun(
    6863             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
    6864             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
    6865          18 :                                                            GRA_Bilinear>);
    6866             : }
    6867             : 
    6868           6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6869             : {
    6870           6 :     return GWKRun(
    6871             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
    6872             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
    6873           6 :                                                            GRA_Bilinear>);
    6874             : }
    6875             : 
    6876           5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6877             : {
    6878           5 :     return GWKRun(
    6879             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
    6880             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
    6881           5 :                                                            GRA_Bilinear>);
    6882             : }
    6883             : 
    6884             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6885             : 
    6886             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6887             : {
    6888             :     return GWKRun(
    6889             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
    6890             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
    6891             :                                                            GRA_Bilinear>);
    6892             : }
    6893             : #endif
    6894             : 
    6895           5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6896             : {
    6897           5 :     return GWKRun(
    6898             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
    6899           5 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
    6900             : }
    6901             : 
    6902          12 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6903             : {
    6904          12 :     return GWKRun(
    6905             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
    6906          12 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
    6907             : }
    6908             : 
    6909           6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6910             : {
    6911           6 :     return GWKRun(
    6912             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
    6913           6 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
    6914             : }
    6915             : 
    6916           5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6917             : {
    6918           5 :     return GWKRun(
    6919             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
    6920           5 :         GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
    6921             : }
    6922             : 
    6923          24 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
    6924             : {
    6925          24 :     return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
    6926             : }
    6927             : 
    6928           0 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
    6929             : {
    6930           0 :     return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
    6931             : }
    6932             : 
    6933          11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6934             : {
    6935          11 :     return GWKRun(
    6936             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
    6937          11 :         GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
    6938             : }
    6939             : 
    6940          37 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
    6941             : {
    6942          37 :     return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
    6943             : }
    6944             : 
    6945             : /************************************************************************/
    6946             : /*                           GWKAverageOrMode()                         */
    6947             : /*                                                                      */
    6948             : /************************************************************************/
    6949             : 
    6950             : static void GWKAverageOrModeThread(void *pData);
    6951             : 
    6952         130 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
    6953             : {
    6954         130 :     return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
    6955             : }
    6956             : 
    6957             : // Overall logic based on GWKGeneralCaseThread().
    6958         130 : static void GWKAverageOrModeThread(void *pData)
    6959             : {
    6960         130 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6961         130 :     GDALWarpKernel *poWK = psJob->poWK;
    6962         130 :     const int iYMin = psJob->iYMin;
    6963         130 :     const int iYMax = psJob->iYMax;
    6964             :     const double dfMultFactorVerticalShiftPipeline =
    6965         130 :         poWK->bApplyVerticalShift
    6966         130 :             ? CPLAtof(CSLFetchNameValueDef(
    6967           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6968             :                   "1.0"))
    6969         130 :             : 0.0;
    6970             : 
    6971         130 :     const int nDstXSize = poWK->nDstXSize;
    6972         130 :     const int nSrcXSize = poWK->nSrcXSize;
    6973         130 :     const int nSrcYSize = poWK->nSrcYSize;
    6974             : 
    6975             :     /* -------------------------------------------------------------------- */
    6976             :     /*      Find out which algorithm to use (small optim.)                  */
    6977             :     /* -------------------------------------------------------------------- */
    6978         130 :     int nAlgo = 0;
    6979             : 
    6980             :     // Only used for GRA_Mode
    6981         130 :     float *pafRealVals = nullptr;
    6982         130 :     float *pafCounts = nullptr;
    6983         130 :     int nBins = 0;
    6984         130 :     int nBinsOffset = 0;
    6985         130 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    6986             : 
    6987             :     // Only used with nAlgo = 6.
    6988         130 :     float quant = 0.5;
    6989             : 
    6990             :     // To control array allocation only when data type is complex
    6991         130 :     const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
    6992             : 
    6993         130 :     if (poWK->eResample == GRA_Average)
    6994             :     {
    6995          71 :         nAlgo = GWKAOM_Average;
    6996             :     }
    6997          59 :     else if (poWK->eResample == GRA_RMS)
    6998             :     {
    6999           9 :         nAlgo = GWKAOM_RMS;
    7000             :     }
    7001          50 :     else if (poWK->eResample == GRA_Mode)
    7002             :     {
    7003             :         // TODO check color table count > 256.
    7004          23 :         if (poWK->eWorkingDataType == GDT_Byte ||
    7005          17 :             poWK->eWorkingDataType == GDT_UInt16 ||
    7006          17 :             poWK->eWorkingDataType == GDT_Int16)
    7007             :         {
    7008          14 :             nAlgo = GWKAOM_Imode;
    7009             : 
    7010             :             // In the case of a paletted or non-paletted byte band,
    7011             :             // Input values are between 0 and 255.
    7012          14 :             if (poWK->eWorkingDataType == GDT_Byte)
    7013             :             {
    7014           6 :                 nBins = 256;
    7015             :             }
    7016             :             // In the case of Int8, input values are between -128 and 127.
    7017           8 :             else if (poWK->eWorkingDataType == GDT_Int8)
    7018             :             {
    7019           0 :                 nBins = 256;
    7020           0 :                 nBinsOffset = 128;
    7021             :             }
    7022             :             // In the case of Int16, input values are between -32768 and 32767.
    7023           8 :             else if (poWK->eWorkingDataType == GDT_Int16)
    7024             :             {
    7025           8 :                 nBins = 65536;
    7026           8 :                 nBinsOffset = 32768;
    7027             :             }
    7028             :             // In the case of UInt16, input values are between 0 and 65537.
    7029           0 :             else if (poWK->eWorkingDataType == GDT_UInt16)
    7030             :             {
    7031           0 :                 nBins = 65536;
    7032             :             }
    7033             :             pafCounts =
    7034          14 :                 static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
    7035          14 :             if (pafCounts == nullptr)
    7036           0 :                 return;
    7037             :         }
    7038             :         else
    7039             :         {
    7040           9 :             nAlgo = GWKAOM_Fmode;
    7041             : 
    7042           9 :             if (nSrcXSize > 0 && nSrcYSize > 0)
    7043             :             {
    7044             :                 pafRealVals = static_cast<float *>(
    7045           9 :                     VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    7046             :                 pafCounts = static_cast<float *>(
    7047           9 :                     VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    7048           9 :                 if (pafRealVals == nullptr || pafCounts == nullptr)
    7049             :                 {
    7050           0 :                     VSIFree(pafRealVals);
    7051           0 :                     VSIFree(pafCounts);
    7052           0 :                     return;
    7053             :                 }
    7054             :             }
    7055             :         }
    7056             :     }
    7057          27 :     else if (poWK->eResample == GRA_Max)
    7058             :     {
    7059           6 :         nAlgo = GWKAOM_Max;
    7060             :     }
    7061          21 :     else if (poWK->eResample == GRA_Min)
    7062             :     {
    7063           5 :         nAlgo = GWKAOM_Min;
    7064             :     }
    7065          16 :     else if (poWK->eResample == GRA_Med)
    7066             :     {
    7067           6 :         nAlgo = GWKAOM_Quant;
    7068           6 :         quant = 0.5;
    7069             :     }
    7070          10 :     else if (poWK->eResample == GRA_Q1)
    7071             :     {
    7072           5 :         nAlgo = GWKAOM_Quant;
    7073           5 :         quant = 0.25;
    7074             :     }
    7075           5 :     else if (poWK->eResample == GRA_Q3)
    7076             :     {
    7077           5 :         nAlgo = GWKAOM_Quant;
    7078           5 :         quant = 0.75;
    7079             :     }
    7080             : #ifdef disabled
    7081             :     else if (poWK->eResample == GRA_Sum)
    7082             :     {
    7083             :         nAlgo = GWKAOM_Sum;
    7084             :     }
    7085             : #endif
    7086             :     else
    7087             :     {
    7088             :         // Other resample algorithms not permitted here.
    7089           0 :         CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
    7090             :                          "illegal resample");
    7091           0 :         return;
    7092             :     }
    7093             : 
    7094         130 :     CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() using algo %d",
    7095             :              nAlgo);
    7096             : 
    7097             :     /* -------------------------------------------------------------------- */
    7098             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7099             :     /*      scanlines worth of positions.                                   */
    7100             :     /* -------------------------------------------------------------------- */
    7101             : 
    7102             :     double *padfX =
    7103         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7104             :     double *padfY =
    7105         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7106             :     double *padfZ =
    7107         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7108             :     double *padfX2 =
    7109         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7110             :     double *padfY2 =
    7111         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7112             :     double *padfZ2 =
    7113         130 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7114         130 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7115         130 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7116             : 
    7117         130 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7118         130 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7119         130 :     const double dfErrorThreshold = CPLAtof(
    7120         130 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7121             : 
    7122             :     const double dfExcludedValuesThreshold =
    7123         130 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7124             :                                      "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
    7125         130 :         100.0;
    7126             :     const double dfNodataValuesThreshold =
    7127         130 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7128             :                                      "NODATA_VALUES_PCT_THRESHOLD", "100")) /
    7129         130 :         100.0;
    7130             : 
    7131             :     const int nXMargin =
    7132         130 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7133             :     const int nYMargin =
    7134         130 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7135             : 
    7136             :     /* ==================================================================== */
    7137             :     /*      Loop over output lines.                                         */
    7138             :     /* ==================================================================== */
    7139        6627 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7140             :     {
    7141             : 
    7142             :         /* --------------------------------------------------------------------
    7143             :          */
    7144             :         /*      Setup points to transform to source image space. */
    7145             :         /* --------------------------------------------------------------------
    7146             :          */
    7147     1669840 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7148             :         {
    7149     1663340 :             padfX[iDstX] = iDstX + poWK->nDstXOff;
    7150     1663340 :             padfY[iDstX] = iDstY + poWK->nDstYOff;
    7151     1663340 :             padfZ[iDstX] = 0.0;
    7152     1663340 :             padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
    7153     1663340 :             padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
    7154     1663340 :             padfZ2[iDstX] = 0.0;
    7155             :         }
    7156             : 
    7157             :         /* --------------------------------------------------------------------
    7158             :          */
    7159             :         /*      Transform the points from destination pixel/line coordinates */
    7160             :         /*      to source pixel/line coordinates. */
    7161             :         /* --------------------------------------------------------------------
    7162             :          */
    7163        6497 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    7164             :                              padfY, padfZ, pabSuccess);
    7165        6497 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
    7166             :                              padfY2, padfZ2, pabSuccess2);
    7167             : 
    7168        6497 :         if (dfSrcCoordPrecision > 0.0)
    7169             :         {
    7170           0 :             GWKRoundSourceCoordinates(
    7171             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    7172             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    7173           0 :                 poWK->nDstXOff, iDstY + poWK->nDstYOff);
    7174           0 :             GWKRoundSourceCoordinates(
    7175             :                 nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2,
    7176             :                 dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
    7177           0 :                 psJob->pTransformerArg, 1.0 + poWK->nDstXOff,
    7178           0 :                 iDstY + 1.0 + poWK->nDstYOff);
    7179             :         }
    7180             : 
    7181             :         /* ====================================================================
    7182             :          */
    7183             :         /*      Loop over pixels in output scanline. */
    7184             :         /* ====================================================================
    7185             :          */
    7186     1669840 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7187             :         {
    7188     1663340 :             GPtrDiff_t iSrcOffset = 0;
    7189     1663340 :             double dfDensity = 1.0;
    7190     1663340 :             bool bHasFoundDensity = false;
    7191             : 
    7192     1663340 :             if (!pabSuccess[iDstX] || !pabSuccess2[iDstX])
    7193      311460 :                 continue;
    7194             : 
    7195             :             // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
    7196             :             // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
    7197     1663340 :             if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    7198     1663320 :                   padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    7199     1663320 :                   padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    7200     1663300 :                   padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    7201     1663300 :                   padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    7202     1663300 :                   padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    7203     1663290 :                   padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
    7204     1663290 :                   padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
    7205             :             {
    7206          62 :                 continue;
    7207             :             }
    7208             : 
    7209     1663280 :             const GPtrDiff_t iDstOffset =
    7210     1663280 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7211             : 
    7212             :             // Compute corners in source crs.
    7213             : 
    7214             :             // The transformation might not have preserved ordering of
    7215             :             // coordinates so do the necessary swapping (#5433).
    7216             :             // NOTE: this is really an approximative fix. To do something
    7217             :             // more precise we would for example need to compute the
    7218             :             // transformation of coordinates in the
    7219             :             // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
    7220             :             // coordinates, and take the bounding box of the got source
    7221             :             // coordinates.
    7222             : 
    7223     1663280 :             if (padfX[iDstX] > padfX2[iDstX])
    7224      268744 :                 std::swap(padfX[iDstX], padfX2[iDstX]);
    7225             : 
    7226             :             // Detect situations where the target pixel is close to the
    7227             :             // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
    7228             :             // close to the left-most and right-most columns of the source
    7229             :             // raster. The 2 value below was experimentally determined to
    7230             :             // avoid false-positives and false-negatives.
    7231             :             // Addresses https://github.com/OSGeo/gdal/issues/6478
    7232     1663280 :             bool bWrapOverX = false;
    7233     1663280 :             const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
    7234     1663280 :             if (poWK->nSrcXOff == 0 &&
    7235     1663280 :                 padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
    7236       14495 :                 (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale <
    7237             :                     nThresholdWrapOverX)
    7238             :             {
    7239             :                 // Check there is a discontinuity by checking at mid-pixel.
    7240             :                 // NOTE: all this remains fragile. To confidently
    7241             :                 // detect antimeridian warping we should probably try to access
    7242             :                 // georeferenced coordinates, and not rely only on tests on
    7243             :                 // image space coordinates. But accessing georeferenced
    7244             :                 // coordinates from here is not trivial, and we would for example
    7245             :                 // have to handle both geographic, Mercator, etc.
    7246             :                 // Let's hope this heuristics is good enough for now.
    7247        1041 :                 double x = iDstX + 0.5 + poWK->nDstXOff;
    7248        1041 :                 double y = iDstY + poWK->nDstYOff;
    7249        1041 :                 double z = 0;
    7250        1041 :                 int bSuccess = FALSE;
    7251        1041 :                 poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y,
    7252             :                                      &z, &bSuccess);
    7253        1041 :                 if (bSuccess && x < padfX[iDstX])
    7254             :                 {
    7255        1008 :                     bWrapOverX = true;
    7256        1008 :                     std::swap(padfX[iDstX], padfX2[iDstX]);
    7257        1008 :                     padfX2[iDstX] += nSrcXSize;
    7258             :                 }
    7259             :             }
    7260             : 
    7261     1663280 :             const double dfXMin = padfX[iDstX] - poWK->nSrcXOff;
    7262     1663280 :             const double dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
    7263     1663280 :             constexpr double EPS = 1e-10;
    7264             :             // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
    7265     1663280 :             if (!(dfXMax > -EPS && dfXMin < nSrcXSize + EPS))
    7266          72 :                 continue;
    7267     1663200 :             int iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPS), 0.0));
    7268     1663200 :             int iSrcXMax = static_cast<int>(
    7269     1663200 :                 std::min(ceil(dfXMax - EPS), static_cast<double>(INT_MAX)));
    7270     1663200 :             if (!bWrapOverX)
    7271     1662200 :                 iSrcXMax = std::min(iSrcXMax, nSrcXSize);
    7272     1663200 :             if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
    7273         472 :                 iSrcXMax++;
    7274             : 
    7275     1663200 :             if (padfY[iDstX] > padfY2[iDstX])
    7276      270117 :                 std::swap(padfY[iDstX], padfY2[iDstX]);
    7277     1663200 :             const double dfYMin = padfY[iDstX] - poWK->nSrcYOff;
    7278     1663200 :             const double dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
    7279             :             // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
    7280     1663200 :             if (!(dfYMax > -EPS && dfYMin < nSrcYSize + EPS))
    7281          36 :                 continue;
    7282     1663170 :             int iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPS), 0.0));
    7283             :             int iSrcYMax =
    7284     1663170 :                 std::min(static_cast<int>(ceil(dfYMax - EPS)), nSrcYSize);
    7285     1663170 :             if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
    7286           0 :                 iSrcYMax++;
    7287             : 
    7288             : #define COMPUTE_WEIGHT_Y(iSrcY)                                                \
    7289             :     ((iSrcY == iSrcYMin)                                                       \
    7290             :          ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin))        \
    7291             :      : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax)                       \
    7292             :                                : 1.0)
    7293             : 
    7294             : #define COMPUTE_WEIGHT(iSrcX, dfWeightY)                                       \
    7295             :     ((iSrcX == iSrcXMin)       ? ((iSrcXMin + 1 == iSrcXMax)                   \
    7296             :                                       ? dfWeightY                              \
    7297             :                                       : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
    7298             :      : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax))         \
    7299             :                                : dfWeightY)
    7300             : 
    7301     1663170 :             bool bDone = false;
    7302             : 
    7303             :             // Special Average mode where we process all bands together,
    7304             :             // to avoid averaging tuples that match an entry of m_aadfExcludedValues
    7305     2267240 :             if (nAlgo == GWKAOM_Average &&
    7306      604073 :                 (!poWK->m_aadfExcludedValues.empty() ||
    7307      393224 :                  dfNodataValuesThreshold < 1 - EPS) &&
    7308     2267240 :                 !poWK->bApplyVerticalShift && !bIsComplex)
    7309             :             {
    7310      393224 :                 double dfTotalWeightInvalid = 0.0;
    7311      393224 :                 double dfTotalWeightExcluded = 0.0;
    7312      393224 :                 double dfTotalWeightRegular = 0.0;
    7313      786448 :                 std::vector<double> adfValueReal(poWK->nBands, 0);
    7314      786448 :                 std::vector<double> adfValueAveraged(poWK->nBands, 0);
    7315             :                 std::vector<int> anCountExcludedValues(
    7316      393224 :                     poWK->m_aadfExcludedValues.size(), 0);
    7317             : 
    7318     1572890 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7319             :                 {
    7320     1179660 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7321     1179660 :                     iSrcOffset =
    7322     1179660 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7323     5111860 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7324             :                          iSrcX++, iSrcOffset++)
    7325             :                     {
    7326     3932190 :                         if (bWrapOverX)
    7327           0 :                             iSrcOffset =
    7328           0 :                                 (iSrcX % nSrcXSize) +
    7329           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7330             : 
    7331     3932190 :                         const double dfWeight =
    7332     3932190 :                             COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7333     3932190 :                         if (dfWeight <= 0)
    7334           0 :                             continue;
    7335             : 
    7336     3932200 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7337          12 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7338             :                         {
    7339           3 :                             dfTotalWeightInvalid += dfWeight;
    7340           3 :                             continue;
    7341             :                         }
    7342             : 
    7343     3932190 :                         bool bAllValid = true;
    7344     7274900 :                         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7345             :                         {
    7346     6160660 :                             double dfBandDensity = 0;
    7347     6160660 :                             double dfValueImagTmp = 0;
    7348     9503370 :                             if (!(GWKGetPixelValue(
    7349             :                                       poWK, iBand, iSrcOffset, &dfBandDensity,
    7350     6160660 :                                       &adfValueReal[iBand], &dfValueImagTmp) &&
    7351     3342710 :                                   dfBandDensity > BAND_DENSITY_THRESHOLD))
    7352             :                             {
    7353     2817950 :                                 bAllValid = false;
    7354     2817950 :                                 break;
    7355             :                             }
    7356             :                         }
    7357             : 
    7358     3932190 :                         if (!bAllValid)
    7359             :                         {
    7360     2817950 :                             dfTotalWeightInvalid += dfWeight;
    7361     2817950 :                             continue;
    7362             :                         }
    7363             : 
    7364     1114240 :                         bool bExcludedValueFound = false;
    7365     2228350 :                         for (size_t i = 0;
    7366     2228350 :                              i < poWK->m_aadfExcludedValues.size(); ++i)
    7367             :                         {
    7368     1114130 :                             if (poWK->m_aadfExcludedValues[i] == adfValueReal)
    7369             :                             {
    7370          21 :                                 bExcludedValueFound = true;
    7371          21 :                                 ++anCountExcludedValues[i];
    7372          21 :                                 dfTotalWeightExcluded += dfWeight;
    7373          21 :                                 break;
    7374             :                             }
    7375             :                         }
    7376     1114240 :                         if (!bExcludedValueFound)
    7377             :                         {
    7378             :                             // Weighted incremental algorithm mean
    7379             :                             // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7380     1114220 :                             dfTotalWeightRegular += dfWeight;
    7381     4456870 :                             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7382             :                             {
    7383     3342650 :                                 adfValueAveraged[iBand] +=
    7384     6685300 :                                     (dfWeight / dfTotalWeightRegular) *
    7385     6685300 :                                     (adfValueReal[iBand] -
    7386     3342650 :                                      adfValueAveraged[iBand]);
    7387             :                             }
    7388             :                         }
    7389             :                     }
    7390             :                 }
    7391             : 
    7392      393224 :                 const double dfTotalWeight = dfTotalWeightInvalid +
    7393             :                                              dfTotalWeightExcluded +
    7394             :                                              dfTotalWeightRegular;
    7395      393224 :                 if (dfTotalWeightInvalid > 0 &&
    7396             :                     dfTotalWeightInvalid >=
    7397      311293 :                         dfNodataValuesThreshold * dfTotalWeight)
    7398             :                 {
    7399             :                     // Do nothing. Let bHasFoundDensity to false.
    7400             :                 }
    7401       81934 :                 else if (dfTotalWeightExcluded > 0 &&
    7402             :                          dfTotalWeightExcluded >=
    7403           6 :                              dfExcludedValuesThreshold * dfTotalWeight)
    7404             :                 {
    7405             :                     // Find the most represented excluded value tuple
    7406           3 :                     size_t iExcludedValue = 0;
    7407           3 :                     int nExcludedValueCount = 0;
    7408           6 :                     for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
    7409             :                          ++i)
    7410             :                     {
    7411           3 :                         if (anCountExcludedValues[i] > nExcludedValueCount)
    7412             :                         {
    7413           3 :                             iExcludedValue = i;
    7414           3 :                             nExcludedValueCount = anCountExcludedValues[i];
    7415             :                         }
    7416             :                     }
    7417             : 
    7418           3 :                     bHasFoundDensity = true;
    7419             : 
    7420          12 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7421             :                     {
    7422           9 :                         GWKSetPixelValue(
    7423             :                             poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
    7424           9 :                             poWK->m_aadfExcludedValues[iExcludedValue][iBand],
    7425             :                             0);
    7426           3 :                     }
    7427             :                 }
    7428       81931 :                 else if (dfTotalWeightRegular > 0)
    7429             :                 {
    7430       81931 :                     bHasFoundDensity = true;
    7431             : 
    7432      327720 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7433             :                     {
    7434      245789 :                         GWKSetPixelValue(poWK, iBand, iDstOffset,
    7435             :                                          /* dfBandDensity = */ 1.0,
    7436      245789 :                                          adfValueAveraged[iBand], 0);
    7437             :                     }
    7438             :                 }
    7439             : 
    7440             :                 // Skip below loop on bands
    7441      393224 :                 bDone = true;
    7442             :             }
    7443             : 
    7444             :             /* ====================================================================
    7445             :              */
    7446             :             /*      Loop processing each band. */
    7447             :             /* ====================================================================
    7448             :              */
    7449             : 
    7450     4439540 :             for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
    7451             :             {
    7452     2776380 :                 double dfBandDensity = 0.0;
    7453     2776380 :                 double dfValueReal = 0.0;
    7454     2776380 :                 double dfValueImag = 0.0;
    7455     2776380 :                 double dfValueRealTmp = 0.0;
    7456     2776380 :                 double dfValueImagTmp = 0.0;
    7457             : 
    7458             :                 /* --------------------------------------------------------------------
    7459             :                  */
    7460             :                 /*      Collect the source value. */
    7461             :                 /* --------------------------------------------------------------------
    7462             :                  */
    7463             : 
    7464             :                 // Loop over source lines and pixels - 3 possible algorithms.
    7465             : 
    7466             :                 // poWK->eResample == GRA_Average.
    7467     2776380 :                 if (nAlgo == GWKAOM_Average)
    7468             :                 {
    7469      300849 :                     double dfTotalWeight = 0.0;
    7470             : 
    7471             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7472             :                     // in gcore/overview.cpp.
    7473      631308 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7474             :                     {
    7475      330459 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7476      330459 :                         iSrcOffset = iSrcXMin +
    7477      330459 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7478      803200 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7479             :                              iSrcX++, iSrcOffset++)
    7480             :                         {
    7481      472741 :                             if (bWrapOverX)
    7482         630 :                                 iSrcOffset =
    7483         630 :                                     (iSrcX % nSrcXSize) +
    7484         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7485             : 
    7486      472745 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7487           4 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7488             :                                             iSrcOffset))
    7489             :                             {
    7490           1 :                                 continue;
    7491             :                             }
    7492             : 
    7493      472740 :                             if (GWKGetPixelValue(
    7494             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7495      945480 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7496      472740 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7497             :                             {
    7498      472740 :                                 const double dfWeight =
    7499      472740 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7500      472740 :                                 if (dfWeight > 0)
    7501             :                                 {
    7502             :                                     // Weighted incremental algorithm mean
    7503             :                                     // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7504      472740 :                                     dfTotalWeight += dfWeight;
    7505      472740 :                                     dfValueReal +=
    7506      472740 :                                         (dfWeight / dfTotalWeight) *
    7507      472740 :                                         (dfValueRealTmp - dfValueReal);
    7508      472740 :                                     if (bIsComplex)
    7509             :                                     {
    7510         252 :                                         dfValueImag +=
    7511         252 :                                             (dfWeight / dfTotalWeight) *
    7512         252 :                                             (dfValueImagTmp - dfValueImag);
    7513             :                                     }
    7514             :                                 }
    7515             :                             }
    7516             :                         }
    7517             :                     }
    7518             : 
    7519      300849 :                     if (dfTotalWeight > 0)
    7520             :                     {
    7521      300849 :                         if (poWK->bApplyVerticalShift)
    7522             :                         {
    7523           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7524           0 :                                 continue;
    7525             :                             // Subtract padfZ[] since the coordinate
    7526             :                             // transformation is from target to source
    7527           0 :                             dfValueReal =
    7528           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7529           0 :                                 padfZ[iDstX] *
    7530             :                                     dfMultFactorVerticalShiftPipeline;
    7531             :                         }
    7532             : 
    7533      300849 :                         dfBandDensity = 1;
    7534      300849 :                         bHasFoundDensity = true;
    7535             :                     }
    7536             :                 }  // GRA_Average.
    7537             :                 // poWK->eResample == GRA_RMS.
    7538     2776380 :                 if (nAlgo == GWKAOM_RMS)
    7539             :                 {
    7540      300416 :                     double dfTotalReal = 0.0;
    7541      300416 :                     double dfTotalImag = 0.0;
    7542      300416 :                     double dfTotalWeight = 0.0;
    7543             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7544             :                     // in gcore/overview.cpp.
    7545      630578 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7546             :                     {
    7547      330162 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7548      330162 :                         iSrcOffset = iSrcXMin +
    7549      330162 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7550      802723 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7551             :                              iSrcX++, iSrcOffset++)
    7552             :                         {
    7553      472561 :                             if (bWrapOverX)
    7554         630 :                                 iSrcOffset =
    7555         630 :                                     (iSrcX % nSrcXSize) +
    7556         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7557             : 
    7558      472561 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7559           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7560             :                                             iSrcOffset))
    7561             :                             {
    7562           0 :                                 continue;
    7563             :                             }
    7564             : 
    7565      472561 :                             if (GWKGetPixelValue(
    7566             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7567      945122 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7568      472561 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7569             :                             {
    7570      472561 :                                 const double dfWeight =
    7571      472561 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7572      472561 :                                 dfTotalWeight += dfWeight;
    7573      472561 :                                 dfTotalReal +=
    7574      472561 :                                     dfValueRealTmp * dfValueRealTmp * dfWeight;
    7575      472561 :                                 if (bIsComplex)
    7576          48 :                                     dfTotalImag += dfValueImagTmp *
    7577          48 :                                                    dfValueImagTmp * dfWeight;
    7578             :                             }
    7579             :                         }
    7580             :                     }
    7581             : 
    7582      300416 :                     if (dfTotalWeight > 0)
    7583             :                     {
    7584      300416 :                         dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
    7585             : 
    7586      300416 :                         if (poWK->bApplyVerticalShift)
    7587             :                         {
    7588           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7589           0 :                                 continue;
    7590             :                             // Subtract padfZ[] since the coordinate
    7591             :                             // transformation is from target to source
    7592           0 :                             dfValueReal =
    7593           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7594           0 :                                 padfZ[iDstX] *
    7595             :                                     dfMultFactorVerticalShiftPipeline;
    7596             :                         }
    7597             : 
    7598      300416 :                         if (bIsComplex)
    7599          12 :                             dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
    7600             : 
    7601      300416 :                         dfBandDensity = 1;
    7602      300416 :                         bHasFoundDensity = true;
    7603             :                     }
    7604             :                 }  // GRA_RMS.
    7605             : #ifdef disabled
    7606             :                 else if (nAlgo == GWKAOM_Sum)
    7607             :                 // poWK->eResample == GRA_Sum
    7608             :                 {
    7609             :                     double dfTotalReal = 0.0;
    7610             :                     double dfTotalImag = 0.0;
    7611             :                     bool bFoundValid = false;
    7612             : 
    7613             :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7614             :                     {
    7615             :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7616             :                         iSrcOffset = iSrcXMin +
    7617             :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7618             :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7619             :                              iSrcX++, iSrcOffset++)
    7620             :                         {
    7621             :                             if (bWrapOverX)
    7622             :                                 iSrcOffset =
    7623             :                                     (iSrcX % nSrcXSize) +
    7624             :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7625             : 
    7626             :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7627             :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7628             :                                             iSrcOffset))
    7629             :                             {
    7630             :                                 continue;
    7631             :                             }
    7632             : 
    7633             :                             if (GWKGetPixelValue(
    7634             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7635             :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7636             :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7637             :                             {
    7638             :                                 const double dfWeight =
    7639             :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7640             :                                 bFoundValid = true;
    7641             :                                 dfTotalReal += dfValueRealTmp * dfWeight;
    7642             :                                 if (bIsComplex)
    7643             :                                 {
    7644             :                                     dfTotalImag += dfValueImagTmp * dfWeight;
    7645             :                                 }
    7646             :                             }
    7647             :                         }
    7648             :                     }
    7649             : 
    7650             :                     if (bFoundValid)
    7651             :                     {
    7652             :                         dfValueReal = dfTotalReal;
    7653             : 
    7654             :                         if (poWK->bApplyVerticalShift)
    7655             :                         {
    7656             :                             if (!std::isfinite(padfZ[iDstX]))
    7657             :                                 continue;
    7658             :                             // Subtract padfZ[] since the coordinate
    7659             :                             // transformation is from target to source
    7660             :                             dfValueReal =
    7661             :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7662             :                                 padfZ[iDstX] *
    7663             :                                     dfMultFactorVerticalShiftPipeline;
    7664             :                         }
    7665             : 
    7666             :                         if (bIsComplex)
    7667             :                         {
    7668             :                             dfValueImag = dfTotalImag;
    7669             :                         }
    7670             :                         dfBandDensity = 1;
    7671             :                         bHasFoundDensity = true;
    7672             :                     }
    7673             :                 }  // GRA_Sum.
    7674             : #endif
    7675     2475960 :                 else if (nAlgo == GWKAOM_Imode || nAlgo == GWKAOM_Fmode)
    7676             :                 // poWK->eResample == GRA_Mode
    7677             :                 {
    7678             :                     // This code adapted from GDALDownsampleChunk32R_Mode() in
    7679             :                     // gcore/overview.cpp.
    7680      500026 :                     if (nAlgo == GWKAOM_Fmode)  // int32 or float.
    7681             :                     {
    7682             :                         // Does it make sense it makes to run a
    7683             :                         // majority filter on floating point data? But, here it
    7684             :                         // is for the sake of compatibility. It won't look
    7685             :                         // right on RGB images by the nature of the filter.
    7686        3407 :                         nBins = 0;
    7687        3407 :                         int iModeIndex = -1;
    7688             : 
    7689       10228 :                         for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7690             :                         {
    7691        6821 :                             const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7692        6821 :                             iSrcOffset =
    7693        6821 :                                 iSrcXMin +
    7694        6821 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7695       20484 :                             for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7696             :                                  iSrcX++, iSrcOffset++)
    7697             :                             {
    7698       13663 :                                 if (bWrapOverX)
    7699           0 :                                     iSrcOffset =
    7700           0 :                                         (iSrcX % nSrcXSize) +
    7701           0 :                                         static_cast<GPtrDiff_t>(iSrcY) *
    7702           0 :                                             nSrcXSize;
    7703             : 
    7704       13663 :                                 if (poWK->panUnifiedSrcValid != nullptr &&
    7705           0 :                                     !CPLMaskGet(poWK->panUnifiedSrcValid,
    7706             :                                                 iSrcOffset))
    7707           0 :                                     continue;
    7708             : 
    7709       13663 :                                 if (GWKGetPixelValue(
    7710             :                                         poWK, iBand, iSrcOffset, &dfBandDensity,
    7711       27326 :                                         &dfValueRealTmp, &dfValueImagTmp) &&
    7712       13663 :                                     dfBandDensity > BAND_DENSITY_THRESHOLD)
    7713             :                                 {
    7714       13663 :                                     const float fVal =
    7715       13663 :                                         static_cast<float>(dfValueRealTmp);
    7716       13663 :                                     const double dfWeight =
    7717       13663 :                                         COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7718             : 
    7719             :                                     // Check array for existing entry.
    7720       13663 :                                     int i = 0;
    7721       29135 :                                     for (i = 0; i < nBins; ++i)
    7722             :                                     {
    7723       17768 :                                         if (pafRealVals[i] == fVal)
    7724             :                                         {
    7725             : 
    7726        2296 :                                             pafCounts[i] +=
    7727        2296 :                                                 static_cast<float>(dfWeight);
    7728        2296 :                                             bool bValIsMaxCount =
    7729        2296 :                                                 (pafCounts[i] >
    7730        2296 :                                                  pafCounts[iModeIndex]);
    7731             : 
    7732        2296 :                                             if (!bValIsMaxCount &&
    7733        1492 :                                                 pafCounts[i] ==
    7734        1492 :                                                     pafCounts[iModeIndex])
    7735             :                                             {
    7736        1487 :                                                 switch (eTieStrategy)
    7737             :                                                 {
    7738        1474 :                                                     case GWKTS_First:
    7739        1474 :                                                         break;
    7740           6 :                                                     case GWKTS_Min:
    7741           6 :                                                         bValIsMaxCount =
    7742             :                                                             fVal <
    7743             :                                                             pafRealVals
    7744           6 :                                                                 [iModeIndex];
    7745           6 :                                                         break;
    7746           7 :                                                     case GWKTS_Max:
    7747           7 :                                                         bValIsMaxCount =
    7748             :                                                             fVal >
    7749             :                                                             pafRealVals
    7750           7 :                                                                 [iModeIndex];
    7751           7 :                                                         break;
    7752             :                                                 }
    7753             :                                             }
    7754             : 
    7755        2296 :                                             if (bValIsMaxCount)
    7756             :                                             {
    7757         807 :                                                 iModeIndex = i;
    7758             :                                             }
    7759             : 
    7760        2296 :                                             break;
    7761             :                                         }
    7762             :                                     }
    7763             : 
    7764             :                                     // Add to arr if entry not already there.
    7765       13663 :                                     if (i == nBins)
    7766             :                                     {
    7767       11367 :                                         pafRealVals[i] = fVal;
    7768       11367 :                                         pafCounts[i] =
    7769       11367 :                                             static_cast<float>(dfWeight);
    7770             : 
    7771       11367 :                                         if (iModeIndex < 0)
    7772        3407 :                                             iModeIndex = i;
    7773             : 
    7774       11367 :                                         ++nBins;
    7775             :                                     }
    7776             :                                 }
    7777             :                             }
    7778             :                         }
    7779             : 
    7780        3407 :                         if (iModeIndex != -1)
    7781             :                         {
    7782        3407 :                             dfValueReal = pafRealVals[iModeIndex];
    7783             : 
    7784        3407 :                             if (poWK->bApplyVerticalShift)
    7785             :                             {
    7786           0 :                                 if (!std::isfinite(padfZ[iDstX]))
    7787           0 :                                     continue;
    7788             :                                 // Subtract padfZ[] since the coordinate
    7789             :                                 // transformation is from target to source
    7790           0 :                                 dfValueReal =
    7791           0 :                                     dfValueReal *
    7792           0 :                                         poWK->dfMultFactorVerticalShift -
    7793           0 :                                     padfZ[iDstX] *
    7794             :                                         dfMultFactorVerticalShiftPipeline;
    7795             :                             }
    7796             : 
    7797        3407 :                             dfBandDensity = 1;
    7798        3407 :                             bHasFoundDensity = true;
    7799             :                         }
    7800             :                     }
    7801             :                     else  // byte or int16.
    7802             :                     {
    7803      496619 :                         float fMaxCount = 0.0f;
    7804      496619 :                         int nMode = -1;
    7805      496619 :                         bool bHasSourceValues = false;
    7806             : 
    7807      496619 :                         memset(pafCounts, 0, nBins * sizeof(float));
    7808             : 
    7809     1612550 :                         for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7810             :                         {
    7811     1115930 :                             const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7812     1115930 :                             iSrcOffset =
    7813     1115930 :                                 iSrcXMin +
    7814     1115930 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7815     4733150 :                             for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7816             :                                  iSrcX++, iSrcOffset++)
    7817             :                             {
    7818     3617220 :                                 if (bWrapOverX)
    7819         630 :                                     iSrcOffset =
    7820         630 :                                         (iSrcX % nSrcXSize) +
    7821         630 :                                         static_cast<GPtrDiff_t>(iSrcY) *
    7822         630 :                                             nSrcXSize;
    7823             : 
    7824     3617220 :                                 if (poWK->panUnifiedSrcValid != nullptr &&
    7825           0 :                                     !CPLMaskGet(poWK->panUnifiedSrcValid,
    7826             :                                                 iSrcOffset))
    7827           0 :                                     continue;
    7828             : 
    7829     3617220 :                                 if (GWKGetPixelValue(
    7830             :                                         poWK, iBand, iSrcOffset, &dfBandDensity,
    7831     7234430 :                                         &dfValueRealTmp, &dfValueImagTmp) &&
    7832     3617220 :                                     dfBandDensity > BAND_DENSITY_THRESHOLD)
    7833             :                                 {
    7834     3617220 :                                     bHasSourceValues = true;
    7835     3617220 :                                     const int nVal =
    7836     3617220 :                                         static_cast<int>(dfValueRealTmp);
    7837     3617220 :                                     const int iBin = nVal + nBinsOffset;
    7838     3617220 :                                     const double dfWeight =
    7839     3617220 :                                         COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7840             : 
    7841             :                                     // Sum the density.
    7842     3617220 :                                     pafCounts[iBin] +=
    7843     3617220 :                                         static_cast<float>(dfWeight);
    7844             :                                     // Is it the most common value so far?
    7845     3617220 :                                     bool bUpdateMode =
    7846     3617220 :                                         pafCounts[iBin] > fMaxCount;
    7847     3617220 :                                     if (!bUpdateMode &&
    7848      778312 :                                         pafCounts[iBin] == fMaxCount)
    7849             :                                     {
    7850      218624 :                                         switch (eTieStrategy)
    7851             :                                         {
    7852      218616 :                                             case GWKTS_First:
    7853      218616 :                                                 break;
    7854           4 :                                             case GWKTS_Min:
    7855           4 :                                                 bUpdateMode = nVal < nMode;
    7856           4 :                                                 break;
    7857           4 :                                             case GWKTS_Max:
    7858           4 :                                                 bUpdateMode = nVal > nMode;
    7859           4 :                                                 break;
    7860             :                                         }
    7861             :                                     }
    7862     3617220 :                                     if (bUpdateMode)
    7863             :                                     {
    7864     2838910 :                                         nMode = nVal;
    7865     2838910 :                                         fMaxCount = pafCounts[iBin];
    7866             :                                     }
    7867             :                                 }
    7868             :                             }
    7869             :                         }
    7870             : 
    7871      496619 :                         if (bHasSourceValues)
    7872             :                         {
    7873      496619 :                             dfValueReal = nMode;
    7874             : 
    7875      496619 :                             if (poWK->bApplyVerticalShift)
    7876             :                             {
    7877           0 :                                 if (!std::isfinite(padfZ[iDstX]))
    7878           0 :                                     continue;
    7879             :                                 // Subtract padfZ[] since the coordinate
    7880             :                                 // transformation is from target to source
    7881           0 :                                 dfValueReal =
    7882           0 :                                     dfValueReal *
    7883           0 :                                         poWK->dfMultFactorVerticalShift -
    7884           0 :                                     padfZ[iDstX] *
    7885             :                                         dfMultFactorVerticalShiftPipeline;
    7886             :                             }
    7887             : 
    7888      496619 :                             dfBandDensity = 1;
    7889      496619 :                             bHasFoundDensity = true;
    7890             :                         }
    7891      500026 :                     }
    7892             :                 }  // GRA_Mode.
    7893     1975930 :                 else if (nAlgo == GWKAOM_Max)
    7894             :                 // poWK->eResample == GRA_Max.
    7895             :                 {
    7896      335037 :                     bool bFoundValid = false;
    7897      335037 :                     double dfTotalReal = cpl::NumericLimits<double>::lowest();
    7898             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7899     1288010 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7900             :                     {
    7901      952975 :                         iSrcOffset = iSrcXMin +
    7902      952975 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7903     4406540 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7904             :                              iSrcX++, iSrcOffset++)
    7905             :                         {
    7906     3453560 :                             if (bWrapOverX)
    7907         630 :                                 iSrcOffset =
    7908         630 :                                     (iSrcX % nSrcXSize) +
    7909         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7910             : 
    7911     3456370 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7912        2809 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7913             :                                             iSrcOffset))
    7914             :                             {
    7915        2446 :                                 continue;
    7916             :                             }
    7917             : 
    7918             :                             // Returns pixel value if it is not no data.
    7919     3451120 :                             if (GWKGetPixelValue(
    7920             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7921     6902230 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7922     3451120 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7923             :                             {
    7924     3451120 :                                 bFoundValid = true;
    7925     3451120 :                                 if (dfTotalReal < dfValueRealTmp)
    7926             :                                 {
    7927      442642 :                                     dfTotalReal = dfValueRealTmp;
    7928             :                                 }
    7929             :                             }
    7930             :                         }
    7931             :                     }
    7932             : 
    7933      335037 :                     if (bFoundValid)
    7934             :                     {
    7935      335037 :                         dfValueReal = dfTotalReal;
    7936             : 
    7937      335037 :                         if (poWK->bApplyVerticalShift)
    7938             :                         {
    7939           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7940           0 :                                 continue;
    7941             :                             // Subtract padfZ[] since the coordinate
    7942             :                             // transformation is from target to source
    7943           0 :                             dfValueReal =
    7944           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7945           0 :                                 padfZ[iDstX] *
    7946             :                                     dfMultFactorVerticalShiftPipeline;
    7947             :                         }
    7948             : 
    7949      335037 :                         dfBandDensity = 1;
    7950      335037 :                         bHasFoundDensity = true;
    7951             :                     }
    7952             :                 }  // GRA_Max.
    7953     1640900 :                 else if (nAlgo == GWKAOM_Min)
    7954             :                 // poWK->eResample == GRA_Min.
    7955             :                 {
    7956      335012 :                     bool bFoundValid = false;
    7957      335012 :                     double dfTotalReal = cpl::NumericLimits<double>::max();
    7958             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7959     1287720 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7960             :                     {
    7961      952710 :                         iSrcOffset = iSrcXMin +
    7962      952710 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7963     4403460 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7964             :                              iSrcX++, iSrcOffset++)
    7965             :                         {
    7966     3450750 :                             if (bWrapOverX)
    7967         630 :                                 iSrcOffset =
    7968         630 :                                     (iSrcX % nSrcXSize) +
    7969         630 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7970             : 
    7971     3450750 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7972           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7973             :                                             iSrcOffset))
    7974             :                             {
    7975           0 :                                 continue;
    7976             :                             }
    7977             : 
    7978             :                             // Returns pixel value if it is not no data.
    7979     3450750 :                             if (GWKGetPixelValue(
    7980             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7981     6901500 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7982     3450750 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7983             :                             {
    7984     3450750 :                                 bFoundValid = true;
    7985     3450750 :                                 if (dfTotalReal > dfValueRealTmp)
    7986             :                                 {
    7987      443069 :                                     dfTotalReal = dfValueRealTmp;
    7988             :                                 }
    7989             :                             }
    7990             :                         }
    7991             :                     }
    7992             : 
    7993      335012 :                     if (bFoundValid)
    7994             :                     {
    7995      335012 :                         dfValueReal = dfTotalReal;
    7996             : 
    7997      335012 :                         if (poWK->bApplyVerticalShift)
    7998             :                         {
    7999           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8000           0 :                                 continue;
    8001             :                             // Subtract padfZ[] since the coordinate
    8002             :                             // transformation is from target to source
    8003           0 :                             dfValueReal =
    8004           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8005           0 :                                 padfZ[iDstX] *
    8006             :                                     dfMultFactorVerticalShiftPipeline;
    8007             :                         }
    8008             : 
    8009      335012 :                         dfBandDensity = 1;
    8010      335012 :                         bHasFoundDensity = true;
    8011             :                     }
    8012             :                 }  // GRA_Min.
    8013     1305880 :                 else if (nAlgo == GWKAOM_Quant)
    8014             :                 // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
    8015             :                 {
    8016     1005040 :                     bool bFoundValid = false;
    8017     1005040 :                     std::vector<double> dfRealValuesTmp;
    8018             : 
    8019             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8020     3863170 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8021             :                     {
    8022     2858130 :                         iSrcOffset = iSrcXMin +
    8023     2858130 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8024    13210400 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8025             :                              iSrcX++, iSrcOffset++)
    8026             :                         {
    8027    10352300 :                             if (bWrapOverX)
    8028        1890 :                                 iSrcOffset =
    8029        1890 :                                     (iSrcX % nSrcXSize) +
    8030        1890 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8031             : 
    8032    10352300 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8033           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8034             :                                             iSrcOffset))
    8035             :                             {
    8036           0 :                                 continue;
    8037             :                             }
    8038             : 
    8039             :                             // Returns pixel value if it is not no data.
    8040    10352300 :                             if (GWKGetPixelValue(
    8041             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8042    20704500 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8043    10352300 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8044             :                             {
    8045    10352300 :                                 bFoundValid = true;
    8046    10352300 :                                 dfRealValuesTmp.push_back(dfValueRealTmp);
    8047             :                             }
    8048             :                         }
    8049             :                     }
    8050             : 
    8051     1005040 :                     if (bFoundValid)
    8052             :                     {
    8053     1005040 :                         std::sort(dfRealValuesTmp.begin(),
    8054             :                                   dfRealValuesTmp.end());
    8055             :                         int quantIdx = static_cast<int>(
    8056     1005040 :                             std::ceil(quant * dfRealValuesTmp.size() - 1));
    8057     1005040 :                         dfValueReal = dfRealValuesTmp[quantIdx];
    8058             : 
    8059     1005040 :                         if (poWK->bApplyVerticalShift)
    8060             :                         {
    8061           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8062           0 :                                 continue;
    8063             :                             // Subtract padfZ[] since the coordinate
    8064             :                             // transformation is from target to source
    8065           0 :                             dfValueReal =
    8066           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8067           0 :                                 padfZ[iDstX] *
    8068             :                                     dfMultFactorVerticalShiftPipeline;
    8069             :                         }
    8070             : 
    8071     1005040 :                         dfBandDensity = 1;
    8072     1005040 :                         bHasFoundDensity = true;
    8073     1005040 :                         dfRealValuesTmp.clear();
    8074             :                     }
    8075             :                 }  // Quantile.
    8076             : 
    8077             :                 /* --------------------------------------------------------------------
    8078             :                  */
    8079             :                 /*      We have a computed value from the source.  Now apply it
    8080             :                  * to      */
    8081             :                 /*      the destination pixel. */
    8082             :                 /* --------------------------------------------------------------------
    8083             :                  */
    8084     2776380 :                 if (bHasFoundDensity)
    8085             :                 {
    8086             :                     // TODO: Should we compute dfBandDensity in fct of
    8087             :                     // nCount/nCount2, or use as a threshold to set the dest
    8088             :                     // value?
    8089             :                     // dfBandDensity = (float) nCount / nCount2;
    8090             :                     // if( (float) nCount / nCount2 > 0.1 )
    8091             :                     // or fix gdalwarp crop_to_cutline to crop partially
    8092             :                     // overlapping pixels.
    8093     2776380 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    8094             :                                      dfValueReal, dfValueImag);
    8095             :                 }
    8096             :             }
    8097             : 
    8098     1663170 :             if (!bHasFoundDensity)
    8099      311290 :                 continue;
    8100             : 
    8101             :             /* --------------------------------------------------------------------
    8102             :              */
    8103             :             /*      Update destination density/validity masks. */
    8104             :             /* --------------------------------------------------------------------
    8105             :              */
    8106     1351880 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    8107             : 
    8108     1351880 :             if (poWK->panDstValid != nullptr)
    8109             :             {
    8110          74 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    8111             :             }
    8112             :         } /* Next iDstX */
    8113             : 
    8114             :         /* --------------------------------------------------------------------
    8115             :          */
    8116             :         /*      Report progress to the user, and optionally cancel out. */
    8117             :         /* --------------------------------------------------------------------
    8118             :          */
    8119        6497 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    8120           0 :             break;
    8121             :     }
    8122             : 
    8123             :     /* -------------------------------------------------------------------- */
    8124             :     /*      Cleanup and return.                                             */
    8125             :     /* -------------------------------------------------------------------- */
    8126         130 :     CPLFree(padfX);
    8127         130 :     CPLFree(padfY);
    8128         130 :     CPLFree(padfZ);
    8129         130 :     CPLFree(padfX2);
    8130         130 :     CPLFree(padfY2);
    8131         130 :     CPLFree(padfZ2);
    8132         130 :     CPLFree(pabSuccess);
    8133         130 :     CPLFree(pabSuccess2);
    8134         130 :     VSIFree(pafCounts);
    8135         130 :     VSIFree(pafRealVals);
    8136             : }
    8137             : 
    8138             : /************************************************************************/
    8139             : /*                         getOrientation()                             */
    8140             : /************************************************************************/
    8141             : 
    8142             : typedef std::pair<double, double> XYPair;
    8143             : 
    8144             : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
    8145             : // -1 if it is counter-clockwise oriented,
    8146             : // or 0 if it is colinear.
    8147     2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
    8148             : {
    8149     2355910 :     const double p1x = p1.first;
    8150     2355910 :     const double p1y = p1.second;
    8151     2355910 :     const double p2x = p2.first;
    8152     2355910 :     const double p2y = p2.second;
    8153     2355910 :     const double p3x = p3.first;
    8154     2355910 :     const double p3y = p3.second;
    8155     2355910 :     const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
    8156     2355910 :     if (std::abs(val) < 1e-20)
    8157        2690 :         return 0;
    8158     2353220 :     else if (val > 0)
    8159           0 :         return 1;
    8160             :     else
    8161     2353220 :         return -1;
    8162             : }
    8163             : 
    8164             : /************************************************************************/
    8165             : /*                          isConvex()                                  */
    8166             : /************************************************************************/
    8167             : 
    8168             : typedef std::vector<XYPair> XYPoly;
    8169             : 
    8170             : // poly must be closed
    8171      785302 : static bool isConvex(const XYPoly &poly)
    8172             : {
    8173      785302 :     const size_t n = poly.size();
    8174      785302 :     size_t i = 0;
    8175      785302 :     int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8176      785302 :     ++i;
    8177     2355910 :     for (; i < n - 2; ++i)
    8178             :     {
    8179             :         const int orientation =
    8180     1570600 :             getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8181     1570600 :         if (orientation != 0)
    8182             :         {
    8183     1567910 :             if (last_orientation == 0)
    8184           0 :                 last_orientation = orientation;
    8185     1567910 :             else if (orientation != last_orientation)
    8186           0 :                 return false;
    8187             :         }
    8188             :     }
    8189      785302 :     return true;
    8190             : }
    8191             : 
    8192             : /************************************************************************/
    8193             : /*                     pointIntersectsConvexPoly()                      */
    8194             : /************************************************************************/
    8195             : 
    8196             : // Returns whether xy intersects poly, that must be closed and convex.
    8197     6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
    8198             : {
    8199     6049100 :     const size_t n = poly.size();
    8200     6049100 :     double dx1 = xy.first - poly[0].first;
    8201     6049100 :     double dy1 = xy.second - poly[0].second;
    8202     6049100 :     double dx2 = poly[1].first - poly[0].first;
    8203     6049100 :     double dy2 = poly[1].second - poly[0].second;
    8204     6049100 :     double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
    8205             : 
    8206             :     // Check if the point remains on the same side (left/right) of all edges
    8207    14556400 :     for (size_t i = 2; i < n; i++)
    8208             :     {
    8209    12793100 :         dx1 = xy.first - poly[i - 1].first;
    8210    12793100 :         dy1 = xy.second - poly[i - 1].second;
    8211             : 
    8212    12793100 :         dx2 = poly[i].first - poly[i - 1].first;
    8213    12793100 :         dy2 = poly[i].second - poly[i - 1].second;
    8214             : 
    8215    12793100 :         double crossProduct = dx1 * dy2 - dx2 * dy1;
    8216    12793100 :         if (std::abs(prevCrossProduct) < 1e-20)
    8217      725558 :             prevCrossProduct = crossProduct;
    8218    12067500 :         else if (prevCrossProduct * crossProduct < 0)
    8219     4285760 :             return false;
    8220             :     }
    8221             : 
    8222     1763340 :     return true;
    8223             : }
    8224             : 
    8225             : /************************************************************************/
    8226             : /*                     getIntersection()                                */
    8227             : /************************************************************************/
    8228             : 
    8229             : /* Returns intersection of [p1,p2] with [p3,p4], if
    8230             :  * it is a single point, and the 2 segments are not colinear.
    8231             :  */
    8232    11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
    8233             :                             const XYPair &p3, const XYPair &p4, XYPair &xy)
    8234             : {
    8235    11811000 :     const double x1 = p1.first;
    8236    11811000 :     const double y1 = p1.second;
    8237    11811000 :     const double x2 = p2.first;
    8238    11811000 :     const double y2 = p2.second;
    8239    11811000 :     const double x3 = p3.first;
    8240    11811000 :     const double y3 = p3.second;
    8241    11811000 :     const double x4 = p4.first;
    8242    11811000 :     const double y4 = p4.second;
    8243    11811000 :     const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
    8244    11811000 :     const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
    8245    11811000 :     if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
    8246     9260780 :         return false;
    8247             : 
    8248     2550260 :     const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
    8249     2550260 :     if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
    8250      973924 :         return false;
    8251             : 
    8252     1576340 :     const double t = t_num / denom;
    8253     1576340 :     xy.first = x1 + t * (x2 - x1);
    8254     1576340 :     xy.second = y1 + t * (y2 - y1);
    8255     1576340 :     return true;
    8256             : }
    8257             : 
    8258             : /************************************************************************/
    8259             : /*                     getConvexPolyIntersection()                      */
    8260             : /************************************************************************/
    8261             : 
    8262             : // poly1 and poly2 must be closed and convex.
    8263             : // The returned intersection will not necessary be closed.
    8264      785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
    8265             :                                       XYPoly &intersection)
    8266             : {
    8267      785302 :     intersection.clear();
    8268             : 
    8269             :     // Add all points of poly1 inside poly2
    8270     3926510 :     for (size_t i = 0; i < poly1.size() - 1; ++i)
    8271             :     {
    8272     3141210 :         if (pointIntersectsConvexPoly(poly1[i], poly2))
    8273     1187430 :             intersection.push_back(poly1[i]);
    8274             :     }
    8275      785302 :     if (intersection.size() == poly1.size() - 1)
    8276             :     {
    8277             :         // poly1 is inside poly2
    8278      119100 :         return;
    8279             :     }
    8280             : 
    8281             :     // Add all points of poly2 inside poly1
    8282     3634860 :     for (size_t i = 0; i < poly2.size() - 1; ++i)
    8283             :     {
    8284     2907890 :         if (pointIntersectsConvexPoly(poly2[i], poly1))
    8285      575904 :             intersection.push_back(poly2[i]);
    8286             :     }
    8287             : 
    8288             :     // Compute the intersection of all edges of both polygons
    8289      726972 :     XYPair xy;
    8290     3634860 :     for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
    8291             :     {
    8292    14539400 :         for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
    8293             :         {
    8294    11631600 :             if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
    8295    11631600 :                                 poly2[i2 + 1], xy))
    8296             :             {
    8297     1576230 :                 intersection.push_back(xy);
    8298             :             }
    8299             :         }
    8300             :     }
    8301             : 
    8302      726972 :     if (intersection.empty())
    8303       60770 :         return;
    8304             : 
    8305             :     // Find lowest-left point in intersection set
    8306      666202 :     double lowest_x = cpl::NumericLimits<double>::max();
    8307      666202 :     double lowest_y = cpl::NumericLimits<double>::max();
    8308     3772450 :     for (const auto &pair : intersection)
    8309             :     {
    8310     3106240 :         const double x = pair.first;
    8311     3106240 :         const double y = pair.second;
    8312     3106240 :         if (y < lowest_y || (y == lowest_y && x < lowest_x))
    8313             :         {
    8314     1096040 :             lowest_x = x;
    8315     1096040 :             lowest_y = y;
    8316             :         }
    8317             :     }
    8318             : 
    8319     5737980 :     const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
    8320             :     {
    8321     5737980 :         const double p1x_diff = p1.first - lowest_x;
    8322     5737980 :         const double p1y_diff = p1.second - lowest_y;
    8323     5737980 :         const double p2x_diff = p2.first - lowest_x;
    8324     5737980 :         const double p2y_diff = p2.second - lowest_y;
    8325     5737980 :         if (p2y_diff == 0.0 && p1y_diff == 0.0)
    8326             :         {
    8327     2655420 :             if (p1x_diff >= 0)
    8328             :             {
    8329     2655420 :                 if (p2x_diff >= 0)
    8330     2655420 :                     return p1.first < p2.first;
    8331           0 :                 return true;
    8332             :             }
    8333             :             else
    8334             :             {
    8335           0 :                 if (p2x_diff >= 0)
    8336           0 :                     return false;
    8337           0 :                 return p1.first < p2.first;
    8338             :             }
    8339             :         }
    8340             : 
    8341     3082560 :         if (p2x_diff == 0.0 && p1x_diff == 0.0)
    8342     1046960 :             return p1.second < p2.second;
    8343             : 
    8344             :         double tan_p1;
    8345     2035600 :         if (p1x_diff == 0.0)
    8346      464622 :             tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    8347             :         else
    8348     1570980 :             tan_p1 = p1y_diff / p1x_diff;
    8349             : 
    8350             :         double tan_p2;
    8351     2035600 :         if (p2x_diff == 0.0)
    8352      839515 :             tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    8353             :         else
    8354     1196080 :             tan_p2 = p2y_diff / p2x_diff;
    8355             : 
    8356     2035600 :         if (tan_p1 >= 0)
    8357             :         {
    8358     1904790 :             if (tan_p2 >= 0)
    8359     1881590 :                 return tan_p1 < tan_p2;
    8360             :             else
    8361       23199 :                 return true;
    8362             :         }
    8363             :         else
    8364             :         {
    8365      130806 :             if (tan_p2 >= 0)
    8366      103900 :                 return false;
    8367             :             else
    8368       26906 :                 return tan_p1 < tan_p2;
    8369             :         }
    8370      666202 :     };
    8371             : 
    8372             :     // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
    8373             :     // hull
    8374      666202 :     std::sort(intersection.begin(), intersection.end(), sortFunc);
    8375             : 
    8376             :     // Remove duplicated points
    8377      666202 :     size_t j = 1;
    8378     3106240 :     for (size_t i = 1; i < intersection.size(); ++i)
    8379             :     {
    8380     2440040 :         if (intersection[i] != intersection[i - 1])
    8381             :         {
    8382     1452560 :             if (j < i)
    8383      545275 :                 intersection[j] = intersection[i];
    8384     1452560 :             ++j;
    8385             :         }
    8386             :     }
    8387      666202 :     intersection.resize(j);
    8388             : }
    8389             : 
    8390             : /************************************************************************/
    8391             : /*                            getArea()                                 */
    8392             : /************************************************************************/
    8393             : 
    8394             : // poly may or may not be closed.
    8395      558521 : static double getArea(const XYPoly &poly)
    8396             : {
    8397             :     // CPLAssert(poly.size() >= 2);
    8398      558521 :     const size_t nPointCount = poly.size();
    8399             :     double dfAreaSum =
    8400      558521 :         poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
    8401             : 
    8402     1765140 :     for (size_t i = 1; i < nPointCount - 1; i++)
    8403             :     {
    8404     1206610 :         dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
    8405             :     }
    8406             : 
    8407      558521 :     dfAreaSum += poly[nPointCount - 1].first *
    8408      558521 :                  (poly[0].second - poly[nPointCount - 2].second);
    8409             : 
    8410      558521 :     return 0.5 * std::fabs(dfAreaSum);
    8411             : }
    8412             : 
    8413             : /************************************************************************/
    8414             : /*                           GWKSumPreserving()                         */
    8415             : /************************************************************************/
    8416             : 
    8417             : static void GWKSumPreservingThread(void *pData);
    8418             : 
    8419          18 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
    8420             : {
    8421          18 :     return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
    8422             : }
    8423             : 
    8424          18 : static void GWKSumPreservingThread(void *pData)
    8425             : {
    8426          18 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    8427          18 :     GDALWarpKernel *poWK = psJob->poWK;
    8428          18 :     const int iYMin = psJob->iYMin;
    8429          18 :     const int iYMax = psJob->iYMax;
    8430             :     const bool bIsAffineNoRotation =
    8431          18 :         GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
    8432          26 :                                         poWK->pTransformerArg) &&
    8433             :         // for debug/testing purposes
    8434           8 :         CPLTestBool(
    8435          18 :             CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
    8436             : 
    8437          18 :     const int nDstXSize = poWK->nDstXSize;
    8438          18 :     const int nSrcXSize = poWK->nSrcXSize;
    8439          18 :     const int nSrcYSize = poWK->nSrcYSize;
    8440             : 
    8441          36 :     std::vector<double> adfX0(nSrcXSize + 1);
    8442          36 :     std::vector<double> adfY0(nSrcXSize + 1);
    8443          36 :     std::vector<double> adfZ0(nSrcXSize + 1);
    8444          36 :     std::vector<double> adfX1(nSrcXSize + 1);
    8445          36 :     std::vector<double> adfY1(nSrcXSize + 1);
    8446          36 :     std::vector<double> adfZ1(nSrcXSize + 1);
    8447          36 :     std::vector<int> abSuccess0(nSrcXSize + 1);
    8448          36 :     std::vector<int> abSuccess1(nSrcXSize + 1);
    8449             : 
    8450             :     CPLRectObj sGlobalBounds;
    8451          18 :     sGlobalBounds.minx = -2 * poWK->dfXScale;
    8452          18 :     sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
    8453          18 :     sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
    8454          18 :     sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
    8455          18 :     CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
    8456             : 
    8457             :     struct SourcePixel
    8458             :     {
    8459             :         int iSrcX;
    8460             :         int iSrcY;
    8461             : 
    8462             :         // Coordinates of source pixel in target pixel coordinates
    8463             :         double dfDstX0;
    8464             :         double dfDstY0;
    8465             :         double dfDstX1;
    8466             :         double dfDstY1;
    8467             :         double dfDstX2;
    8468             :         double dfDstY2;
    8469             :         double dfDstX3;
    8470             :         double dfDstY3;
    8471             : 
    8472             :         // Source pixel total area (might be larger than the one described
    8473             :         // by above coordinates, if the pixel was crossing the antimeridian
    8474             :         // and split)
    8475             :         double dfArea;
    8476             :     };
    8477             : 
    8478          36 :     std::vector<SourcePixel> sourcePixels;
    8479             : 
    8480          36 :     XYPoly discontinuityLeft(5);
    8481          36 :     XYPoly discontinuityRight(5);
    8482             : 
    8483             :     /* ==================================================================== */
    8484             :     /*      First pass: transform the 4 corners of each potential           */
    8485             :     /*      contributing source pixel to target pixel coordinates.          */
    8486             :     /* ==================================================================== */
    8487             : 
    8488             :     // Special case for top line
    8489             :     {
    8490          18 :         int iY = 0;
    8491        1130 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8492             :         {
    8493        1112 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8494        1112 :             adfY1[iX] = iY + poWK->nSrcYOff;
    8495        1112 :             adfZ1[iX] = 0;
    8496             :         }
    8497             : 
    8498          18 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8499             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8500             :                              abSuccess1.data());
    8501             : 
    8502        1130 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8503             :         {
    8504        1112 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8505           0 :                 abSuccess1[iX] = FALSE;
    8506             :             else
    8507             :             {
    8508        1112 :                 adfX1[iX] -= poWK->nDstXOff;
    8509        1112 :                 adfY1[iX] -= poWK->nDstYOff;
    8510             :             }
    8511             :         }
    8512             :     }
    8513             : 
    8514      413412 :     const auto getInsideXSign = [poWK, nDstXSize](double dfX)
    8515             :     {
    8516      413412 :         return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
    8517      205344 :                        dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
    8518      413412 :                    ? 1
    8519      208068 :                    : -1;
    8520          18 :     };
    8521             : 
    8522             :     const auto FindDiscontinuity =
    8523          80 :         [poWK, psJob, getInsideXSign](
    8524             :             double dfXLeft, double dfXRight, double dfY,
    8525             :             int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
    8526         800 :             double &dfXMidReprojectedRight, double &dfYMidReprojected)
    8527             :     {
    8528         880 :         for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
    8529             :         {
    8530         800 :             double dfXMid = (dfXLeft + dfXRight) / 2;
    8531         800 :             double dfXMidReprojected = dfXMid;
    8532         800 :             dfYMidReprojected = dfY;
    8533         800 :             double dfZ = 0;
    8534         800 :             int nSuccess = 0;
    8535         800 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
    8536             :                                  &dfXMidReprojected, &dfYMidReprojected, &dfZ,
    8537             :                                  &nSuccess);
    8538         800 :             if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
    8539             :             {
    8540         456 :                 dfXRight = dfXMid;
    8541         456 :                 dfXMidReprojectedRight = dfXMidReprojected;
    8542             :             }
    8543             :             else
    8544             :             {
    8545         344 :                 dfXLeft = dfXMid;
    8546         344 :                 dfXMidReprojectedLeft = dfXMidReprojected;
    8547             :             }
    8548             :         }
    8549          80 :     };
    8550             : 
    8551         566 :     for (int iY = 0; iY < nSrcYSize; ++iY)
    8552             :     {
    8553         548 :         std::swap(adfX0, adfX1);
    8554         548 :         std::swap(adfY0, adfY1);
    8555         548 :         std::swap(adfZ0, adfZ1);
    8556         548 :         std::swap(abSuccess0, abSuccess1);
    8557             : 
    8558      104512 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8559             :         {
    8560      103964 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8561      103964 :             adfY1[iX] = iY + 1 + poWK->nSrcYOff;
    8562      103964 :             adfZ1[iX] = 0;
    8563             :         }
    8564             : 
    8565         548 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8566             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8567             :                              abSuccess1.data());
    8568             : 
    8569      104512 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8570             :         {
    8571      103964 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8572           0 :                 abSuccess1[iX] = FALSE;
    8573             :             else
    8574             :             {
    8575      103964 :                 adfX1[iX] -= poWK->nDstXOff;
    8576      103964 :                 adfY1[iX] -= poWK->nDstYOff;
    8577             :             }
    8578             :         }
    8579             : 
    8580      103964 :         for (int iX = 0; iX < nSrcXSize; ++iX)
    8581             :         {
    8582      206832 :             if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
    8583      103416 :                 abSuccess1[iX + 1])
    8584             :             {
    8585             :                 /* --------------------------------------------------------------------
    8586             :                  */
    8587             :                 /*      Do not try to apply transparent source pixels to the
    8588             :                  * destination.*/
    8589             :                 /* --------------------------------------------------------------------
    8590             :                  */
    8591      103416 :                 const auto iSrcOffset =
    8592      103416 :                     iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
    8593      105816 :                 if (poWK->panUnifiedSrcValid != nullptr &&
    8594        2400 :                     !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    8595             :                 {
    8596       10971 :                     continue;
    8597             :                 }
    8598             : 
    8599      103410 :                 if (poWK->pafUnifiedSrcDensity != nullptr)
    8600             :                 {
    8601           0 :                     if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
    8602             :                         SRC_DENSITY_THRESHOLD)
    8603           0 :                         continue;
    8604             :                 }
    8605             : 
    8606             :                 SourcePixel sp;
    8607      103410 :                 sp.dfArea = 0;
    8608      103410 :                 sp.dfDstX0 = adfX0[iX];
    8609      103410 :                 sp.dfDstY0 = adfY0[iX];
    8610      103410 :                 sp.dfDstX1 = adfX0[iX + 1];
    8611      103410 :                 sp.dfDstY1 = adfY0[iX + 1];
    8612      103410 :                 sp.dfDstX2 = adfX1[iX + 1];
    8613      103410 :                 sp.dfDstY2 = adfY1[iX + 1];
    8614      103410 :                 sp.dfDstX3 = adfX1[iX];
    8615      103410 :                 sp.dfDstY3 = adfY1[iX];
    8616             : 
    8617             :                 // Detect pixel that likely cross the anti-meridian and
    8618             :                 // introduce a discontinuity when reprojected.
    8619             : 
    8620      103410 :                 if (getInsideXSign(adfX0[iX]) !=
    8621      103506 :                         getInsideXSign(adfX0[iX + 1]) &&
    8622         164 :                     getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
    8623          68 :                     getInsideXSign(adfX0[iX + 1]) ==
    8624      103574 :                         getInsideXSign(adfX1[iX + 1]) &&
    8625          40 :                     (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
    8626             :                         0)
    8627             :                 {
    8628          40 :                     double dfXMidReprojectedLeftTop = 0;
    8629          40 :                     double dfXMidReprojectedRightTop = 0;
    8630          40 :                     double dfYMidReprojectedTop = 0;
    8631          40 :                     FindDiscontinuity(
    8632          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8633          80 :                         iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
    8634             :                         dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
    8635             :                         dfYMidReprojectedTop);
    8636          40 :                     double dfXMidReprojectedLeftBottom = 0;
    8637          40 :                     double dfXMidReprojectedRightBottom = 0;
    8638          40 :                     double dfYMidReprojectedBottom = 0;
    8639          40 :                     FindDiscontinuity(
    8640          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8641          80 :                         iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
    8642             :                         dfXMidReprojectedLeftBottom,
    8643             :                         dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
    8644             : 
    8645          40 :                     discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
    8646          40 :                     discontinuityLeft[1] =
    8647          80 :                         XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
    8648          40 :                     discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
    8649          40 :                                                   dfYMidReprojectedBottom);
    8650          40 :                     discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
    8651          40 :                     discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
    8652             : 
    8653          40 :                     discontinuityRight[0] =
    8654          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8655          40 :                     discontinuityRight[1] =
    8656          80 :                         XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
    8657          40 :                     discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
    8658          40 :                                                    dfYMidReprojectedBottom);
    8659          40 :                     discontinuityRight[3] =
    8660          80 :                         XYPair(adfX1[iX + 1], adfY1[iX + 1]);
    8661          40 :                     discontinuityRight[4] =
    8662          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8663             : 
    8664          40 :                     sp.dfArea = getArea(discontinuityLeft) +
    8665          40 :                                 getArea(discontinuityRight);
    8666          40 :                     if (getInsideXSign(adfX0[iX]) >= 1)
    8667             :                     {
    8668          20 :                         sp.dfDstX1 = dfXMidReprojectedLeftTop;
    8669          20 :                         sp.dfDstY1 = dfYMidReprojectedTop;
    8670          20 :                         sp.dfDstX2 = dfXMidReprojectedLeftBottom;
    8671          20 :                         sp.dfDstY2 = dfYMidReprojectedBottom;
    8672             :                     }
    8673             :                     else
    8674             :                     {
    8675          20 :                         sp.dfDstX0 = dfXMidReprojectedRightTop;
    8676          20 :                         sp.dfDstY0 = dfYMidReprojectedTop;
    8677          20 :                         sp.dfDstX3 = dfXMidReprojectedRightBottom;
    8678          20 :                         sp.dfDstY3 = dfYMidReprojectedBottom;
    8679             :                     }
    8680             :                 }
    8681             : 
    8682             :                 // Bounding box of source pixel (expressed in target pixel
    8683             :                 // coordinates)
    8684             :                 CPLRectObj sRect;
    8685      103410 :                 sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
    8686      103410 :                                       std::min(sp.dfDstX2, sp.dfDstX3));
    8687      103410 :                 sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
    8688      103410 :                                       std::min(sp.dfDstY2, sp.dfDstY3));
    8689      103410 :                 sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
    8690      103410 :                                       std::max(sp.dfDstX2, sp.dfDstX3));
    8691      103410 :                 sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
    8692      103410 :                                       std::max(sp.dfDstY2, sp.dfDstY3));
    8693      103410 :                 if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
    8694      101350 :                       sRect.miny < iYMax && sRect.maxy > iYMin))
    8695             :                 {
    8696       10852 :                     continue;
    8697             :                 }
    8698             : 
    8699       92558 :                 sp.iSrcX = iX;
    8700       92558 :                 sp.iSrcY = iY;
    8701             : 
    8702       92558 :                 if (!bIsAffineNoRotation)
    8703             :                 {
    8704             :                     // Check polygon validity (no self-crossing)
    8705       89745 :                     XYPair xy;
    8706       89745 :                     if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
    8707       89745 :                                         XYPair(sp.dfDstX1, sp.dfDstY1),
    8708       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8709      269235 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
    8710       89745 :                         getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
    8711       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8712       89745 :                                         XYPair(sp.dfDstX0, sp.dfDstY0),
    8713      179490 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy))
    8714             :                     {
    8715         113 :                         continue;
    8716             :                     }
    8717             :                 }
    8718             : 
    8719       92445 :                 CPLQuadTreeInsertWithBounds(
    8720             :                     hQuadTree,
    8721             :                     reinterpret_cast<void *>(
    8722       92445 :                         static_cast<uintptr_t>(sourcePixels.size())),
    8723             :                     &sRect);
    8724             : 
    8725       92445 :                 sourcePixels.push_back(sp);
    8726             :             }
    8727             :         }
    8728             :     }
    8729             : 
    8730          36 :     std::vector<double> adfRealValue(poWK->nBands);
    8731          36 :     std::vector<double> adfImagValue(poWK->nBands);
    8732          36 :     std::vector<double> adfBandDensity(poWK->nBands);
    8733          36 :     std::vector<double> adfWeight(poWK->nBands);
    8734             : 
    8735             : #ifdef CHECK_SUM_WITH_GEOS
    8736             :     auto hGEOSContext = OGRGeometry::createGEOSContext();
    8737             :     auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8738             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
    8739             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
    8740             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
    8741             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
    8742             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
    8743             :     auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
    8744             :     auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
    8745             : 
    8746             :     auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8747             :     auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
    8748             :     auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
    8749             : #endif
    8750             : 
    8751             :     const XYPoly xy1{
    8752          36 :         {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
    8753          36 :     XYPoly xy2(5);
    8754          36 :     XYPoly xy2_triangle(4);
    8755          36 :     XYPoly intersection;
    8756             : 
    8757             :     /* ==================================================================== */
    8758             :     /*      Loop over output lines.                                         */
    8759             :     /* ==================================================================== */
    8760         891 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    8761             :     {
    8762             :         CPLRectObj sRect;
    8763         873 :         sRect.miny = iDstY;
    8764         873 :         sRect.maxy = iDstY + 1;
    8765             : 
    8766             :         /* ====================================================================
    8767             :          */
    8768             :         /*      Loop over pixels in output scanline. */
    8769             :         /* ====================================================================
    8770             :          */
    8771      221042 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    8772             :         {
    8773      220169 :             sRect.minx = iDstX;
    8774      220169 :             sRect.maxx = iDstX + 1;
    8775      220169 :             int nSourcePixels = 0;
    8776             :             void **pahSourcePixel =
    8777      220169 :                 CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
    8778      220169 :             if (nSourcePixels == 0)
    8779             :             {
    8780        1258 :                 CPLFree(pahSourcePixel);
    8781        1262 :                 continue;
    8782             :             }
    8783             : 
    8784      218911 :             std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
    8785      218911 :             std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
    8786      218911 :             std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
    8787      218911 :             std::fill(adfWeight.begin(), adfWeight.end(), 0);
    8788      218911 :             double dfDensity = 0;
    8789      218911 :             double dfTotalWeight = 0;
    8790             : 
    8791             :             /* ====================================================================
    8792             :              */
    8793             :             /*          Iterate over each contributing source pixel to add its
    8794             :              */
    8795             :             /*          value weighed by the ratio of the area of its
    8796             :              * intersection  */
    8797             :             /*          with the target pixel divided by the area of the source
    8798             :              */
    8799             :             /*          pixel. */
    8800             :             /* ====================================================================
    8801             :              */
    8802     1020520 :             for (int i = 0; i < nSourcePixels; ++i)
    8803             :             {
    8804      801614 :                 const int iSourcePixel = static_cast<int>(
    8805      801614 :                     reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
    8806      801614 :                 auto &sp = sourcePixels[iSourcePixel];
    8807             : 
    8808      801614 :                 double dfWeight = 0.0;
    8809      801614 :                 if (bIsAffineNoRotation)
    8810             :                 {
    8811             :                     // Optimization since the source pixel is a rectangle in
    8812             :                     // target pixel coordinates
    8813       16312 :                     double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
    8814       16312 :                     double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
    8815       16312 :                     double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
    8816       16312 :                     double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
    8817       16312 :                     double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
    8818       16312 :                     double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
    8819       16312 :                     double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
    8820       16312 :                     double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
    8821       16312 :                     dfWeight =
    8822       16312 :                         ((dfIntersMaxX - dfIntersMinX) *
    8823       16312 :                          (dfIntersMaxY - dfIntersMinY)) /
    8824       16312 :                         ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
    8825             :                 }
    8826             :                 else
    8827             :                 {
    8828             :                     // Compute the polygon of the source pixel in target pixel
    8829             :                     // coordinates, and shifted to the target pixel (unit square
    8830             :                     // coordinates)
    8831             : 
    8832      785302 :                     xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    8833      785302 :                     xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
    8834      785302 :                     xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
    8835      785302 :                     xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
    8836      785302 :                     xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    8837             : 
    8838      785302 :                     if (isConvex(xy2))
    8839             :                     {
    8840      785302 :                         getConvexPolyIntersection(xy1, xy2, intersection);
    8841      785302 :                         if (intersection.size() >= 3)
    8842             :                         {
    8843      468849 :                             dfWeight = getArea(intersection);
    8844             :                         }
    8845             :                     }
    8846             :                     else
    8847             :                     {
    8848             :                         // Split xy2 into 2 triangles.
    8849           0 :                         xy2_triangle[0] = xy2[0];
    8850           0 :                         xy2_triangle[1] = xy2[1];
    8851           0 :                         xy2_triangle[2] = xy2[2];
    8852           0 :                         xy2_triangle[3] = xy2[0];
    8853           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    8854             :                                                   intersection);
    8855           0 :                         if (intersection.size() >= 3)
    8856             :                         {
    8857           0 :                             dfWeight = getArea(intersection);
    8858             :                         }
    8859             : 
    8860           0 :                         xy2_triangle[1] = xy2[2];
    8861           0 :                         xy2_triangle[2] = xy2[3];
    8862           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    8863             :                                                   intersection);
    8864           0 :                         if (intersection.size() >= 3)
    8865             :                         {
    8866           0 :                             dfWeight += getArea(intersection);
    8867             :                         }
    8868             :                     }
    8869      785302 :                     if (dfWeight > 0.0)
    8870             :                     {
    8871      468828 :                         if (sp.dfArea == 0)
    8872       89592 :                             sp.dfArea = getArea(xy2);
    8873      468828 :                         dfWeight /= sp.dfArea;
    8874             :                     }
    8875             : 
    8876             : #ifdef CHECK_SUM_WITH_GEOS
    8877             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
    8878             :                                          sp.dfDstX0 - iDstX,
    8879             :                                          sp.dfDstY0 - iDstY);
    8880             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
    8881             :                                          sp.dfDstX1 - iDstX,
    8882             :                                          sp.dfDstY1 - iDstY);
    8883             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
    8884             :                                          sp.dfDstX2 - iDstX,
    8885             :                                          sp.dfDstY2 - iDstY);
    8886             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
    8887             :                                          sp.dfDstX3 - iDstX,
    8888             :                                          sp.dfDstY3 - iDstY);
    8889             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
    8890             :                                          sp.dfDstX0 - iDstX,
    8891             :                                          sp.dfDstY0 - iDstY);
    8892             : 
    8893             :                     double dfWeightGEOS = 0.0;
    8894             :                     auto hIntersection =
    8895             :                         GEOSIntersection_r(hGEOSContext, hP1, hP2);
    8896             :                     if (hIntersection)
    8897             :                     {
    8898             :                         double dfIntersArea = 0.0;
    8899             :                         if (GEOSArea_r(hGEOSContext, hIntersection,
    8900             :                                        &dfIntersArea) &&
    8901             :                             dfIntersArea > 0)
    8902             :                         {
    8903             :                             double dfSourceArea = 0.0;
    8904             :                             if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
    8905             :                             {
    8906             :                                 dfWeightGEOS = dfIntersArea / dfSourceArea;
    8907             :                             }
    8908             :                         }
    8909             :                         GEOSGeom_destroy_r(hGEOSContext, hIntersection);
    8910             :                     }
    8911             :                     if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
    8912             :                     {
    8913             :                         /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
    8914             :                                         dfWeight, dfWeightGEOS);
    8915             :                         printf("xy2: ");  // ok
    8916             :                         for (const auto &xy : xy2)
    8917             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    8918             :                         printf("\n");                                   // ok
    8919             :                         printf("intersection: ");                       // ok
    8920             :                         for (const auto &xy : intersection)
    8921             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    8922             :                         printf("\n");                                   // ok
    8923             :                     }
    8924             : #endif
    8925             :                 }
    8926      801614 :                 if (dfWeight > 0.0)
    8927             :                 {
    8928      474099 :                     const GPtrDiff_t iSrcOffset =
    8929      474099 :                         sp.iSrcX +
    8930      474099 :                         static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
    8931      474099 :                     dfTotalWeight += dfWeight;
    8932             : 
    8933      474099 :                     if (poWK->pafUnifiedSrcDensity != nullptr)
    8934             :                     {
    8935           0 :                         dfDensity +=
    8936           0 :                             dfWeight * poWK->pafUnifiedSrcDensity[iSrcOffset];
    8937             :                     }
    8938             :                     else
    8939             :                     {
    8940      474099 :                         dfDensity += dfWeight;
    8941             :                     }
    8942             : 
    8943     1818720 :                     for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    8944             :                     {
    8945             :                         // Returns pixel value if it is not no data.
    8946             :                         double dfBandDensity;
    8947             :                         double dfRealValue;
    8948             :                         double dfImagValue;
    8949     2689240 :                         if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
    8950             :                                                &dfBandDensity, &dfRealValue,
    8951             :                                                &dfImagValue) &&
    8952     1344620 :                               dfBandDensity > BAND_DENSITY_THRESHOLD))
    8953             :                         {
    8954           0 :                             continue;
    8955             :                         }
    8956             : 
    8957     1344620 :                         adfRealValue[iBand] += dfRealValue * dfWeight;
    8958     1344620 :                         adfImagValue[iBand] += dfImagValue * dfWeight;
    8959     1344620 :                         adfBandDensity[iBand] += dfBandDensity * dfWeight;
    8960     1344620 :                         adfWeight[iBand] += dfWeight;
    8961             :                     }
    8962             :                 }
    8963             :             }
    8964             : 
    8965      218911 :             CPLFree(pahSourcePixel);
    8966             : 
    8967             :             /* --------------------------------------------------------------------
    8968             :              */
    8969             :             /*          Update destination pixel value. */
    8970             :             /* --------------------------------------------------------------------
    8971             :              */
    8972      218911 :             bool bHasFoundDensity = false;
    8973      218911 :             const GPtrDiff_t iDstOffset =
    8974      218911 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    8975      827822 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    8976             :             {
    8977      608911 :                 if (adfWeight[iBand] > 0)
    8978             :                 {
    8979             :                     const double dfBandDensity =
    8980      608907 :                         adfBandDensity[iBand] / adfWeight[iBand];
    8981      608907 :                     if (dfBandDensity > BAND_DENSITY_THRESHOLD)
    8982             :                     {
    8983      608907 :                         bHasFoundDensity = true;
    8984      608907 :                         GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    8985      608907 :                                          adfRealValue[iBand],
    8986      608907 :                                          adfImagValue[iBand]);
    8987             :                     }
    8988             :                 }
    8989             :             }
    8990             : 
    8991      218911 :             if (!bHasFoundDensity)
    8992           4 :                 continue;
    8993             : 
    8994             :             /* --------------------------------------------------------------------
    8995             :              */
    8996             :             /*          Update destination density/validity masks. */
    8997             :             /* --------------------------------------------------------------------
    8998             :              */
    8999      218907 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
    9000             : 
    9001      218907 :             if (poWK->panDstValid != nullptr)
    9002             :             {
    9003       11750 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    9004             :             }
    9005             :         }
    9006             : 
    9007             :         /* --------------------------------------------------------------------
    9008             :          */
    9009             :         /*      Report progress to the user, and optionally cancel out. */
    9010             :         /* --------------------------------------------------------------------
    9011             :          */
    9012         873 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    9013           0 :             break;
    9014             :     }
    9015             : 
    9016             : #ifdef CHECK_SUM_WITH_GEOS
    9017             :     GEOSGeom_destroy_r(hGEOSContext, hP1);
    9018             :     GEOSGeom_destroy_r(hGEOSContext, hP2);
    9019             :     OGRGeometry::freeGEOSContext(hGEOSContext);
    9020             : #endif
    9021          18 :     CPLQuadTreeDestroy(hQuadTree);
    9022          18 : }

Generated by: LCOV version 1.14