LCOV - code coverage report
Current view: top level - alg - gdalwarpkernel.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 2800 3513 79.7 %
Date: 2024-05-04 12:52:34 Functions: 143 151 94.7 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  High Performance Image Reprojector
       4             :  * Purpose:  Implementation of the GDALWarpKernel class.  Implements the actual
       5             :  *           image warping for a "chunk" of input and output imagery already
       6             :  *           loaded into memory.
       7             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       8             :  *
       9             :  ******************************************************************************
      10             :  * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
      11             :  * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
      12             :  *
      13             :  * Permission is hereby granted, free of charge, to any person obtaining a
      14             :  * copy of this software and associated documentation files (the "Software"),
      15             :  * to deal in the Software without restriction, including without limitation
      16             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      17             :  * and/or sell copies of the Software, and to permit persons to whom the
      18             :  * Software is furnished to do so, subject to the following conditions:
      19             :  *
      20             :  * The above copyright notice and this permission notice shall be included
      21             :  * in all copies or substantial portions of the Software.
      22             :  *
      23             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
      24             :  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      25             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
      26             :  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      27             :  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      28             :  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
      29             :  * DEALINGS IN THE SOFTWARE.
      30             :  ****************************************************************************/
      31             : 
      32             : #include "cpl_port.h"
      33             : #include "gdalwarper.h"
      34             : 
      35             : #include <cfloat>
      36             : #include <cmath>
      37             : #include <cstddef>
      38             : #include <cstdlib>
      39             : #include <cstring>
      40             : 
      41             : #include <algorithm>
      42             : #include <limits>
      43             : #include <mutex>
      44             : #include <new>
      45             : #include <utility>
      46             : #include <vector>
      47             : 
      48             : #include "cpl_atomic_ops.h"
      49             : #include "cpl_conv.h"
      50             : #include "cpl_error.h"
      51             : #include "cpl_mask.h"
      52             : #include "cpl_multiproc.h"
      53             : #include "cpl_progress.h"
      54             : #include "cpl_string.h"
      55             : #include "cpl_vsi.h"
      56             : #include "cpl_worker_thread_pool.h"
      57             : #include "cpl_quad_tree.h"
      58             : #include "gdal.h"
      59             : #include "gdal_alg.h"
      60             : #include "gdal_alg_priv.h"
      61             : #include "gdal_thread_pool.h"
      62             : #include "gdalwarpkernel_opencl.h"
      63             : 
      64             : // #define CHECK_SUM_WITH_GEOS
      65             : #ifdef CHECK_SUM_WITH_GEOS
      66             : #include "ogr_geometry.h"
      67             : #include "ogr_geos.h"
      68             : #endif
      69             : 
      70             : // We restrict to 64bit processors because they are guaranteed to have SSE2.
      71             : // Could possibly be used too on 32bit, but we would need to check at runtime.
      72             : #if defined(__x86_64) || defined(_M_X64)
      73             : #include "gdalsse_priv.h"
      74             : 
      75             : #if __SSE4_1__
      76             : #include <smmintrin.h>
      77             : #endif
      78             : 
      79             : #if __SSE3__
      80             : #include <pmmintrin.h>
      81             : #endif
      82             : 
      83             : #endif
      84             : 
      85             : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
      86             : constexpr float SRC_DENSITY_THRESHOLD = 0.000000001f;
      87             : 
      88             : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
      89             : 
      90             : static const int anGWKFilterRadius[] = {
      91             :     0,  // Nearest neighbour
      92             :     1,  // Bilinear
      93             :     2,  // Cubic Convolution (Catmull-Rom)
      94             :     2,  // Cubic B-Spline
      95             :     3,  // Lanczos windowed sinc
      96             :     0,  // Average
      97             :     0,  // Mode
      98             :     0,  // Reserved GRA_Gauss=7
      99             :     0,  // Max
     100             :     0,  // Min
     101             :     0,  // Med
     102             :     0,  // Q1
     103             :     0,  // Q3
     104             :     0,  // Sum
     105             :     0,  // RMS
     106             : };
     107             : 
     108             : static double GWKBilinear(double dfX);
     109             : static double GWKCubic(double dfX);
     110             : static double GWKBSpline(double dfX);
     111             : static double GWKLanczosSinc(double dfX);
     112             : 
     113             : static const FilterFuncType apfGWKFilter[] = {
     114             :     nullptr,         // Nearest neighbour
     115             :     GWKBilinear,     // Bilinear
     116             :     GWKCubic,        // Cubic Convolution (Catmull-Rom)
     117             :     GWKBSpline,      // Cubic B-Spline
     118             :     GWKLanczosSinc,  // Lanczos windowed sinc
     119             :     nullptr,         // Average
     120             :     nullptr,         // Mode
     121             :     nullptr,         // Reserved GRA_Gauss=7
     122             :     nullptr,         // Max
     123             :     nullptr,         // Min
     124             :     nullptr,         // Med
     125             :     nullptr,         // Q1
     126             :     nullptr,         // Q3
     127             :     nullptr,         // Sum
     128             :     nullptr,         // RMS
     129             : };
     130             : 
     131             : // TODO(schwehr): Can we make these functions have a const * const arg?
     132             : static double GWKBilinear4Values(double *padfVals);
     133             : static double GWKCubic4Values(double *padfVals);
     134             : static double GWKBSpline4Values(double *padfVals);
     135             : static double GWKLanczosSinc4Values(double *padfVals);
     136             : 
     137             : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
     138             :     nullptr,                // Nearest neighbour
     139             :     GWKBilinear4Values,     // Bilinear
     140             :     GWKCubic4Values,        // Cubic Convolution (Catmull-Rom)
     141             :     GWKBSpline4Values,      // Cubic B-Spline
     142             :     GWKLanczosSinc4Values,  // Lanczos windowed sinc
     143             :     nullptr,                // Average
     144             :     nullptr,                // Mode
     145             :     nullptr,                // Reserved GRA_Gauss=7
     146             :     nullptr,                // Max
     147             :     nullptr,                // Min
     148             :     nullptr,                // Med
     149             :     nullptr,                // Q1
     150             :     nullptr,                // Q3
     151             :     nullptr,                // Sum
     152             :     nullptr,                // RMS
     153             : };
     154             : 
     155        9500 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
     156             : {
     157             :     static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
     158             :                   "Bad size of anGWKFilterRadius");
     159        9500 :     return anGWKFilterRadius[eResampleAlg];
     160             : }
     161             : 
     162        3603 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
     163             : {
     164             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
     165             :                   "Bad size of apfGWKFilter");
     166        3603 :     return apfGWKFilter[eResampleAlg];
     167             : }
     168             : 
     169        3600 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
     170             : {
     171             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
     172             :                   "Bad size of apfGWKFilter4Values");
     173        3600 :     return apfGWKFilter4Values[eResampleAlg];
     174             : }
     175             : 
     176             : #ifdef HAVE_OPENCL
     177             : static CPLErr GWKOpenCLCase(GDALWarpKernel *);
     178             : #endif
     179             : 
     180             : static CPLErr GWKGeneralCase(GDALWarpKernel *);
     181             : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
     182             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     183             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     184             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     185             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     186             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     187             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     188             : #endif
     189             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     190             : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
     191             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     192             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     193             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     194             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     195             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     196             : #endif
     197             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     198             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     199             : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
     200             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     201             : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
     202             : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
     203             : static CPLErr GWKSumPreserving(GDALWarpKernel *);
     204             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     205             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     206             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     207             : 
     208             : /************************************************************************/
     209             : /*                           GWKJobStruct                               */
     210             : /************************************************************************/
     211             : 
     212             : struct GWKJobStruct
     213             : {
     214             :     std::mutex &mutex;
     215             :     std::condition_variable &cv;
     216             :     int &counter;
     217             :     bool &stopFlag;
     218             :     GDALWarpKernel *poWK;
     219             :     int iYMin;
     220             :     int iYMax;
     221             :     int (*pfnProgress)(GWKJobStruct *psJob);
     222             :     void *pTransformerArg;
     223             :     void (*pfnFunc)(
     224             :         void *);  // used by GWKRun() to assign the proper pTransformerArg
     225             : 
     226        2051 :     GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
     227             :                  int &counter_, bool &stopFlag_)
     228        2051 :         : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_),
     229             :           poWK(nullptr), iYMin(0), iYMax(0), pfnProgress(nullptr),
     230        2051 :           pTransformerArg(nullptr), pfnFunc(nullptr)
     231             :     {
     232        2051 :     }
     233             : };
     234             : 
     235             : struct GWKThreadData
     236             : {
     237             :     std::unique_ptr<CPLJobQueue> poJobQueue{};
     238             :     std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
     239             :     int nMaxThreads{0};
     240             :     int counter{0};
     241             :     bool stopFlag{false};
     242             :     std::mutex mutex{};
     243             :     std::condition_variable cv{};
     244             :     bool bTransformerArgInputAssignedToThread{false};
     245             :     void *pTransformerArgInput{
     246             :         nullptr};  // owned by calling layer. Not to be destroyed
     247             :     std::map<GIntBig, void *> mapThreadToTransformerArg{};
     248             :     int nTotalThreadCountForThisRun = 0;
     249             :     int nCurThreadCountForThisRun = 0;
     250             : };
     251             : 
     252             : /************************************************************************/
     253             : /*                        GWKProgressThread()                           */
     254             : /************************************************************************/
     255             : 
     256             : // Return TRUE if the computation must be interrupted.
     257          18 : static int GWKProgressThread(GWKJobStruct *psJob)
     258             : {
     259          18 :     bool stop = false;
     260             :     {
     261          18 :         std::lock_guard<std::mutex> lock(psJob->mutex);
     262          18 :         psJob->counter++;
     263          18 :         stop = psJob->stopFlag;
     264             :     }
     265          18 :     psJob->cv.notify_one();
     266             : 
     267          18 :     return stop;
     268             : }
     269             : 
     270             : /************************************************************************/
     271             : /*                      GWKProgressMonoThread()                         */
     272             : /************************************************************************/
     273             : 
     274             : // Return TRUE if the computation must be interrupted.
     275      197317 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
     276             : {
     277      197317 :     GDALWarpKernel *poWK = psJob->poWK;
     278             :     // coverity[missing_lock]
     279      197317 :     if (!poWK->pfnProgress(
     280      197317 :             poWK->dfProgressBase +
     281      197317 :                 poWK->dfProgressScale *
     282      197317 :                     (++psJob->counter / static_cast<double>(psJob->iYMax)),
     283             :             "", poWK->pProgress))
     284             :     {
     285           1 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     286           1 :         psJob->stopFlag = true;
     287           1 :         return TRUE;
     288             :     }
     289      197316 :     return FALSE;
     290             : }
     291             : 
     292             : /************************************************************************/
     293             : /*                       GWKGenericMonoThread()                         */
     294             : /************************************************************************/
     295             : 
     296        2046 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
     297             :                                    void (*pfnFunc)(void *pUserData))
     298             : {
     299        2046 :     GWKThreadData td;
     300             : 
     301             :     // NOTE: the mutex is not used.
     302        2046 :     GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
     303        2046 :     job.poWK = poWK;
     304        2046 :     job.iYMin = 0;
     305        2046 :     job.iYMax = poWK->nDstYSize;
     306        2046 :     job.pfnProgress = GWKProgressMonoThread;
     307        2046 :     job.pTransformerArg = poWK->pTransformerArg;
     308        2046 :     pfnFunc(&job);
     309             : 
     310        4092 :     return td.stopFlag ? CE_Failure : CE_None;
     311             : }
     312             : 
     313             : /************************************************************************/
     314             : /*                          GWKThreadsCreate()                          */
     315             : /************************************************************************/
     316             : 
     317        1345 : void *GWKThreadsCreate(char **papszWarpOptions,
     318             :                        GDALTransformerFunc /* pfnTransformer */,
     319             :                        void *pTransformerArg)
     320             : {
     321             :     const char *pszWarpThreads =
     322        1345 :         CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
     323        1345 :     if (pszWarpThreads == nullptr)
     324        1345 :         pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
     325             : 
     326        1345 :     int nThreads = 0;
     327        1345 :     if (EQUAL(pszWarpThreads, "ALL_CPUS"))
     328           3 :         nThreads = CPLGetNumCPUs();
     329             :     else
     330        1342 :         nThreads = atoi(pszWarpThreads);
     331        1345 :     if (nThreads <= 1)
     332        1340 :         nThreads = 0;
     333        1345 :     if (nThreads > 128)
     334           0 :         nThreads = 128;
     335             : 
     336        1345 :     GWKThreadData *psThreadData = new GWKThreadData();
     337             :     auto poThreadPool =
     338        1345 :         nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
     339        1345 :     if (nThreads && poThreadPool)
     340             :     {
     341           5 :         psThreadData->nMaxThreads = nThreads;
     342           5 :         psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
     343             :             nThreads,
     344           5 :             GWKJobStruct(psThreadData->mutex, psThreadData->cv,
     345          10 :                          psThreadData->counter, psThreadData->stopFlag)));
     346             : 
     347           5 :         psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
     348           5 :         psThreadData->pTransformerArgInput = pTransformerArg;
     349             :     }
     350             : 
     351        1345 :     return psThreadData;
     352             : }
     353             : 
     354             : /************************************************************************/
     355             : /*                             GWKThreadsEnd()                          */
     356             : /************************************************************************/
     357             : 
     358        1345 : void GWKThreadsEnd(void *psThreadDataIn)
     359             : {
     360        1345 :     if (psThreadDataIn == nullptr)
     361           0 :         return;
     362             : 
     363        1345 :     GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
     364        1345 :     if (psThreadData->poJobQueue)
     365             :     {
     366             :         // cppcheck-suppress constVariableReference
     367          15 :         for (auto &pair : psThreadData->mapThreadToTransformerArg)
     368             :         {
     369          10 :             CPLAssert(pair.second != psThreadData->pTransformerArgInput);
     370          10 :             GDALDestroyTransformer(pair.second);
     371             :         }
     372           5 :         psThreadData->poJobQueue.reset();
     373             :     }
     374        1345 :     delete psThreadData;
     375             : }
     376             : 
     377             : /************************************************************************/
     378             : /*                         ThreadFuncAdapter()                          */
     379             : /************************************************************************/
     380             : 
     381          15 : static void ThreadFuncAdapter(void *pData)
     382             : {
     383          15 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
     384          15 :     GWKThreadData *psThreadData =
     385          15 :         static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
     386             : 
     387             :     // Look if we have already a per-thread transformer
     388          15 :     void *pTransformerArg = nullptr;
     389          15 :     const GIntBig nThreadId = CPLGetPID();
     390             : 
     391             :     {
     392          30 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     393          15 :         ++psThreadData->nCurThreadCountForThisRun;
     394             : 
     395          15 :         auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
     396          15 :         if (oIter != psThreadData->mapThreadToTransformerArg.end())
     397             :         {
     398           0 :             pTransformerArg = oIter->second;
     399             :         }
     400          15 :         else if (!psThreadData->bTransformerArgInputAssignedToThread &&
     401          15 :                  psThreadData->nCurThreadCountForThisRun ==
     402          15 :                      psThreadData->nTotalThreadCountForThisRun)
     403             :         {
     404             :             // If we are the last thread to be started, temporarily borrow the
     405             :             // original transformer
     406           5 :             psThreadData->bTransformerArgInputAssignedToThread = true;
     407           5 :             pTransformerArg = psThreadData->pTransformerArgInput;
     408           5 :             psThreadData->mapThreadToTransformerArg[nThreadId] =
     409             :                 pTransformerArg;
     410             :         }
     411             : 
     412          15 :         if (pTransformerArg == nullptr)
     413             :         {
     414          10 :             CPLAssert(psThreadData->pTransformerArgInput != nullptr);
     415          10 :             CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
     416             :         }
     417             :     }
     418             : 
     419             :     // If no transformer assigned to current thread, instantiate one
     420          15 :     if (pTransformerArg == nullptr)
     421             :     {
     422             :         // This somehow assumes that GDALCloneTransformer() is thread-safe
     423             :         // which should normally be the case.
     424             :         pTransformerArg =
     425          10 :             GDALCloneTransformer(psThreadData->pTransformerArgInput);
     426             : 
     427             :         // Lock for the stop flag and the transformer map.
     428          10 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     429          10 :         if (!pTransformerArg)
     430             :         {
     431           0 :             psJob->stopFlag = true;
     432           0 :             return;
     433             :         }
     434          10 :         psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
     435             :     }
     436             : 
     437          15 :     psJob->pTransformerArg = pTransformerArg;
     438          15 :     psJob->pfnFunc(pData);
     439             : 
     440             :     // Give back original transformer, if borrowed.
     441             :     {
     442          30 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     443          15 :         if (psThreadData->bTransformerArgInputAssignedToThread &&
     444           7 :             pTransformerArg == psThreadData->pTransformerArgInput)
     445             :         {
     446             :             psThreadData->mapThreadToTransformerArg.erase(
     447           5 :                 psThreadData->mapThreadToTransformerArg.find(nThreadId));
     448           5 :             psThreadData->bTransformerArgInputAssignedToThread = false;
     449             :         }
     450             :     }
     451             : }
     452             : 
     453             : /************************************************************************/
     454             : /*                                GWKRun()                              */
     455             : /************************************************************************/
     456             : 
     457        2051 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
     458             :                      void (*pfnFunc)(void *pUserData))
     459             : 
     460             : {
     461        2051 :     const int nDstYSize = poWK->nDstYSize;
     462             : 
     463        2051 :     CPLDebug("GDAL",
     464             :              "GDALWarpKernel()::%s() "
     465             :              "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
     466             :              pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
     467             :              poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
     468             :              poWK->nDstYSize);
     469             : 
     470        2051 :     if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
     471             :     {
     472           0 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     473           0 :         return CE_Failure;
     474             :     }
     475             : 
     476        2051 :     GWKThreadData *psThreadData =
     477             :         static_cast<GWKThreadData *>(poWK->psThreadData);
     478        2051 :     if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
     479             :     {
     480        2046 :         return GWKGenericMonoThread(poWK, pfnFunc);
     481             :     }
     482             : 
     483           5 :     int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
     484             :     // Config option mostly useful for tests to be able to test multithreading
     485             :     // with small rasters
     486             :     const int nWarpChunkSize =
     487           5 :         atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
     488           5 :     if (nWarpChunkSize > 0)
     489             :     {
     490           3 :         GIntBig nChunks =
     491           3 :             static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
     492           3 :         if (nThreads > nChunks)
     493           1 :             nThreads = static_cast<int>(nChunks);
     494             :     }
     495           5 :     if (nThreads <= 0)
     496           1 :         nThreads = 1;
     497             : 
     498           5 :     CPLDebug("WARP", "Using %d threads", nThreads);
     499             : 
     500           5 :     auto &jobs = *psThreadData->threadJobs;
     501           5 :     CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
     502             :     // Fill-in job structures.
     503          20 :     for (int i = 0; i < nThreads; ++i)
     504             :     {
     505          15 :         auto &job = jobs[i];
     506          15 :         job.poWK = poWK;
     507          15 :         job.iYMin =
     508          15 :             static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
     509          15 :         job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
     510          15 :                                      nThreads);
     511          15 :         if (poWK->pfnProgress != GDALDummyProgress)
     512           1 :             job.pfnProgress = GWKProgressThread;
     513          15 :         job.pfnFunc = pfnFunc;
     514             :     }
     515             : 
     516             :     {
     517          10 :         std::unique_lock<std::mutex> lock(psThreadData->mutex);
     518             : 
     519           5 :         psThreadData->nTotalThreadCountForThisRun = nThreads;
     520             :         // coverity[missing_lock]
     521           5 :         psThreadData->nCurThreadCountForThisRun = 0;
     522             : 
     523             :         // Start jobs.
     524          20 :         for (int i = 0; i < nThreads; ++i)
     525             :         {
     526          15 :             auto &job = jobs[i];
     527          15 :             psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
     528             :                                                 static_cast<void *>(&job));
     529             :         }
     530             : 
     531             :         /* --------------------------------------------------------------------
     532             :          */
     533             :         /*      Report progress. */
     534             :         /* --------------------------------------------------------------------
     535             :          */
     536           5 :         if (poWK->pfnProgress != GDALDummyProgress)
     537             :         {
     538           1 :             while (psThreadData->counter < nDstYSize)
     539             :             {
     540           1 :                 psThreadData->cv.wait(lock);
     541           1 :                 if (!poWK->pfnProgress(poWK->dfProgressBase +
     542           1 :                                            poWK->dfProgressScale *
     543           1 :                                                (psThreadData->counter /
     544           1 :                                                 static_cast<double>(nDstYSize)),
     545             :                                        "", poWK->pProgress))
     546             :                 {
     547           1 :                     CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     548           1 :                     psThreadData->stopFlag = true;
     549           1 :                     break;
     550             :                 }
     551             :             }
     552             :         }
     553             :     }
     554             : 
     555             :     /* -------------------------------------------------------------------- */
     556             :     /*      Wait for all jobs to complete.                                  */
     557             :     /* -------------------------------------------------------------------- */
     558           5 :     psThreadData->poJobQueue->WaitCompletion();
     559             : 
     560           5 :     return psThreadData->stopFlag ? CE_Failure : CE_None;
     561             : }
     562             : 
     563             : /************************************************************************/
     564             : /* ==================================================================== */
     565             : /*                            GDALWarpKernel                            */
     566             : /* ==================================================================== */
     567             : /************************************************************************/
     568             : 
     569             : /**
     570             :  * \class GDALWarpKernel "gdalwarper.h"
     571             :  *
     572             :  * Low level image warping class.
     573             :  *
     574             :  * This class is responsible for low level image warping for one
     575             :  * "chunk" of imagery.  The class is essentially a structure with all
     576             :  * data members public - primarily so that new special-case functions
     577             :  * can be added without changing the class declaration.
     578             :  *
     579             :  * Applications are normally intended to interactive with warping facilities
     580             :  * through the GDALWarpOperation class, though the GDALWarpKernel can in
     581             :  * theory be used directly if great care is taken in setting up the
     582             :  * control data.
     583             :  *
     584             :  * <h3>Design Issues</h3>
     585             :  *
     586             :  * The intention is that PerformWarp() would analyze the setup in terms
     587             :  * of the datatype, resampling type, and validity/density mask usage and
     588             :  * pick one of many specific implementations of the warping algorithm over
     589             :  * a continuum of optimization vs. generality.  At one end there will be a
     590             :  * reference general purpose implementation of the algorithm that supports
     591             :  * any data type (working internally in double precision complex), all three
     592             :  * resampling types, and any or all of the validity/density masks.  At the
     593             :  * other end would be highly optimized algorithms for common cases like
     594             :  * nearest neighbour resampling on GDT_Byte data with no masks.
     595             :  *
     596             :  * The full set of optimized versions have not been decided but we should
     597             :  * expect to have at least:
     598             :  *  - One for each resampling algorithm for 8bit data with no masks.
     599             :  *  - One for each resampling algorithm for float data with no masks.
     600             :  *  - One for each resampling algorithm for float data with any/all masks
     601             :  *    (essentially the generic case for just float data).
     602             :  *  - One for each resampling algorithm for 8bit data with support for
     603             :  *    input validity masks (per band or per pixel).  This handles the common
     604             :  *    case of nodata masking.
     605             :  *  - One for each resampling algorithm for float data with support for
     606             :  *    input validity masks (per band or per pixel).  This handles the common
     607             :  *    case of nodata masking.
     608             :  *
     609             :  * Some of the specializations would operate on all bands in one pass
     610             :  * (especially the ones without masking would do this), while others might
     611             :  * process each band individually to reduce code complexity.
     612             :  *
     613             :  * <h3>Masking Semantics</h3>
     614             :  *
     615             :  * A detailed explanation of the semantics of the validity and density masks,
     616             :  * and their effects on resampling kernels is needed here.
     617             :  */
     618             : 
     619             : /************************************************************************/
     620             : /*                     GDALWarpKernel Data Members                      */
     621             : /************************************************************************/
     622             : 
     623             : /**
     624             :  * \var GDALResampleAlg GDALWarpKernel::eResample;
     625             :  *
     626             :  * Resampling algorithm.
     627             :  *
     628             :  * The resampling algorithm to use.  One of GRA_NearestNeighbour, GRA_Bilinear,
     629             :  * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
     630             :  * GRA_Mode or GRA_Sum.
     631             :  *
     632             :  * This field is required. GDT_NearestNeighbour may be used as a default
     633             :  * value.
     634             :  */
     635             : 
     636             : /**
     637             :  * \var GDALDataType GDALWarpKernel::eWorkingDataType;
     638             :  *
     639             :  * Working pixel data type.
     640             :  *
     641             :  * The datatype of pixels in the source image (papabySrcimage) and
     642             :  * destination image (papabyDstImage) buffers.  Note that operations on
     643             :  * some data types (such as GDT_Byte) may be much better optimized than other
     644             :  * less common cases.
     645             :  *
     646             :  * This field is required.  It may not be GDT_Unknown.
     647             :  */
     648             : 
     649             : /**
     650             :  * \var int GDALWarpKernel::nBands;
     651             :  *
     652             :  * Number of bands.
     653             :  *
     654             :  * The number of bands (layers) of imagery being warped.  Determines the
     655             :  * number of entries in the papabySrcImage, papanBandSrcValid,
     656             :  * and papabyDstImage arrays.
     657             :  *
     658             :  * This field is required.
     659             :  */
     660             : 
     661             : /**
     662             :  * \var int GDALWarpKernel::nSrcXSize;
     663             :  *
     664             :  * Source image width in pixels.
     665             :  *
     666             :  * This field is required.
     667             :  */
     668             : 
     669             : /**
     670             :  * \var int GDALWarpKernel::nSrcYSize;
     671             :  *
     672             :  * Source image height in pixels.
     673             :  *
     674             :  * This field is required.
     675             :  */
     676             : 
     677             : /**
     678             :  * \var double GDALWarpKernel::dfSrcXExtraSize;
     679             :  *
     680             :  * Number of pixels included in nSrcXSize that are present on the edges of
     681             :  * the area of interest to take into account the width of the kernel.
     682             :  *
     683             :  * This field is required.
     684             :  */
     685             : 
     686             : /**
     687             :  * \var double GDALWarpKernel::dfSrcYExtraSize;
     688             :  *
     689             :  * Number of pixels included in nSrcYExtraSize that are present on the edges of
     690             :  * the area of interest to take into account the height of the kernel.
     691             :  *
     692             :  * This field is required.
     693             :  */
     694             : 
     695             : /**
     696             :  * \var int GDALWarpKernel::papabySrcImage;
     697             :  *
     698             :  * Array of source image band data.
     699             :  *
     700             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     701             :  * to image data.  Each individual band of image data is organized as a single
     702             :  * block of image data in left to right, then bottom to top order.  The actual
     703             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     704             :  *
     705             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     706             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     707             :  * this:
     708             :  *
     709             :  * \code
     710             :  *   float dfPixelValue;
     711             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     712             :  *   int   nPixel = 3; // Zero based.
     713             :  *   int   nLine = 4;  // Zero based.
     714             :  *
     715             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     716             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     717             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     718             :  *   dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
     719             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     720             :  * \endcode
     721             :  *
     722             :  * This field is required.
     723             :  */
     724             : 
     725             : /**
     726             :  * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
     727             :  *
     728             :  * Per band validity mask for source pixels.
     729             :  *
     730             :  * Array of pixel validity mask layers for each source band.   Each of
     731             :  * the mask layers is the same size (in pixels) as the source image with
     732             :  * one bit per pixel.  Note that it is legal (and common) for this to be
     733             :  * NULL indicating that none of the pixels are invalidated, or for some
     734             :  * band validity masks to be NULL in which case all pixels of the band are
     735             :  * valid.  The following code can be used to test the validity of a particular
     736             :  * pixel.
     737             :  *
     738             :  * \code
     739             :  *   int   bIsValid = TRUE;
     740             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     741             :  *   int   nPixel = 3; // Zero based.
     742             :  *   int   nLine = 4;  // Zero based.
     743             :  *
     744             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     745             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     746             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     747             :  *
     748             :  *   if( poKern->papanBandSrcValid != NULL
     749             :  *       && poKern->papanBandSrcValid[nBand] != NULL )
     750             :  *   {
     751             :  *       GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
     752             :  *       int    iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
     753             :  *
     754             :  *       bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
     755             :  *   }
     756             :  * \endcode
     757             :  */
     758             : 
     759             : /**
     760             :  * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
     761             :  *
     762             :  * Per pixel validity mask for source pixels.
     763             :  *
     764             :  * A single validity mask layer that applies to the pixels of all source
     765             :  * bands.  It is accessed similarly to papanBandSrcValid, but without the
     766             :  * extra level of band indirection.
     767             :  *
     768             :  * This pointer may be NULL indicating that all pixels are valid.
     769             :  *
     770             :  * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
     771             :  * the pixel isn't considered to be valid unless both arrays indicate it is
     772             :  * valid.
     773             :  */
     774             : 
     775             : /**
     776             :  * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
     777             :  *
     778             :  * Per pixel density mask for source pixels.
     779             :  *
     780             :  * A single density mask layer that applies to the pixels of all source
     781             :  * bands.  It contains values between 0.0 and 1.0 indicating the degree to
     782             :  * which this pixel should be allowed to contribute to the output result.
     783             :  *
     784             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     785             :  *
     786             :  * The density for a pixel may be accessed like this:
     787             :  *
     788             :  * \code
     789             :  *   float fDensity = 1.0;
     790             :  *   int nPixel = 3;  // Zero based.
     791             :  *   int nLine = 4;   // Zero based.
     792             :  *
     793             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     794             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     795             :  *   if( poKern->pafUnifiedSrcDensity != NULL )
     796             :  *     fDensity = poKern->pafUnifiedSrcDensity
     797             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     798             :  * \endcode
     799             :  */
     800             : 
     801             : /**
     802             :  * \var int GDALWarpKernel::nDstXSize;
     803             :  *
     804             :  * Width of destination image in pixels.
     805             :  *
     806             :  * This field is required.
     807             :  */
     808             : 
     809             : /**
     810             :  * \var int GDALWarpKernel::nDstYSize;
     811             :  *
     812             :  * Height of destination image in pixels.
     813             :  *
     814             :  * This field is required.
     815             :  */
     816             : 
     817             : /**
     818             :  * \var GByte **GDALWarpKernel::papabyDstImage;
     819             :  *
     820             :  * Array of destination image band data.
     821             :  *
     822             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     823             :  * to image data.  Each individual band of image data is organized as a single
     824             :  * block of image data in left to right, then bottom to top order.  The actual
     825             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     826             :  *
     827             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     828             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     829             :  * this:
     830             :  *
     831             :  * \code
     832             :  *   float dfPixelValue;
     833             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     834             :  *   int   nPixel = 3; // Zero based.
     835             :  *   int   nLine = 4;  // Zero based.
     836             :  *
     837             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     838             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     839             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     840             :  *   dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
     841             :  *                                  [nPixel + nLine * poKern->nSrcYSize];
     842             :  * \endcode
     843             :  *
     844             :  * This field is required.
     845             :  */
     846             : 
     847             : /**
     848             :  * \var GUInt32 *GDALWarpKernel::panDstValid;
     849             :  *
     850             :  * Per pixel validity mask for destination pixels.
     851             :  *
     852             :  * A single validity mask layer that applies to the pixels of all destination
     853             :  * bands.  It is accessed similarly to papanUnitifiedSrcValid, but based
     854             :  * on the size of the destination image.
     855             :  *
     856             :  * This pointer may be NULL indicating that all pixels are valid.
     857             :  */
     858             : 
     859             : /**
     860             :  * \var float *GDALWarpKernel::pafDstDensity;
     861             :  *
     862             :  * Per pixel density mask for destination pixels.
     863             :  *
     864             :  * A single density mask layer that applies to the pixels of all destination
     865             :  * bands.  It contains values between 0.0 and 1.0.
     866             :  *
     867             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     868             :  *
     869             :  * The density for a pixel may be accessed like this:
     870             :  *
     871             :  * \code
     872             :  *   float fDensity = 1.0;
     873             :  *   int   nPixel = 3; // Zero based.
     874             :  *   int   nLine = 4;  // Zero based.
     875             :  *
     876             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     877             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     878             :  *   if( poKern->pafDstDensity != NULL )
     879             :  *     fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
     880             :  * \endcode
     881             :  */
     882             : 
     883             : /**
     884             :  * \var int GDALWarpKernel::nSrcXOff;
     885             :  *
     886             :  * X offset to source pixel coordinates for transformation.
     887             :  *
     888             :  * See pfnTransformer.
     889             :  *
     890             :  * This field is required.
     891             :  */
     892             : 
     893             : /**
     894             :  * \var int GDALWarpKernel::nSrcYOff;
     895             :  *
     896             :  * Y offset to source pixel coordinates for transformation.
     897             :  *
     898             :  * See pfnTransformer.
     899             :  *
     900             :  * This field is required.
     901             :  */
     902             : 
     903             : /**
     904             :  * \var int GDALWarpKernel::nDstXOff;
     905             :  *
     906             :  * X offset to destination pixel coordinates for transformation.
     907             :  *
     908             :  * See pfnTransformer.
     909             :  *
     910             :  * This field is required.
     911             :  */
     912             : 
     913             : /**
     914             :  * \var int GDALWarpKernel::nDstYOff;
     915             :  *
     916             :  * Y offset to destination pixel coordinates for transformation.
     917             :  *
     918             :  * See pfnTransformer.
     919             :  *
     920             :  * This field is required.
     921             :  */
     922             : 
     923             : /**
     924             :  * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
     925             :  *
     926             :  * Source/destination location transformer.
     927             :  *
     928             :  * The function to call to transform coordinates between source image
     929             :  * pixel/line coordinates and destination image pixel/line coordinates.
     930             :  * See GDALTransformerFunc() for details of the semantics of this function.
     931             :  *
     932             :  * The GDALWarpKern algorithm will only ever use this transformer in
     933             :  * "destination to source" mode (bDstToSrc=TRUE), and will always pass
     934             :  * partial or complete scanlines of points in the destination image as
     935             :  * input.  This means, among other things, that it is safe to the
     936             :  * approximating transform GDALApproxTransform() as the transformation
     937             :  * function.
     938             :  *
     939             :  * Source and destination images may be subsets of a larger overall image.
     940             :  * The transformation algorithms will expect and return pixel/line coordinates
     941             :  * in terms of this larger image, so coordinates need to be offset by
     942             :  * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
     943             :  * passing to pfnTransformer, and after return from it.
     944             :  *
     945             :  * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
     946             :  * data to this function when it is called.
     947             :  *
     948             :  * This field is required.
     949             :  */
     950             : 
     951             : /**
     952             :  * \var void *GDALWarpKernel::pTransformerArg;
     953             :  *
     954             :  * Callback data for pfnTransformer.
     955             :  *
     956             :  * This field may be NULL if not required for the pfnTransformer being used.
     957             :  */
     958             : 
     959             : /**
     960             :  * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
     961             :  *
     962             :  * The function to call to report progress of the algorithm, and to check
     963             :  * for a requested termination of the operation.  It operates according to
     964             :  * GDALProgressFunc() semantics.
     965             :  *
     966             :  * Generally speaking the progress function will be invoked for each
     967             :  * scanline of the destination buffer that has been processed.
     968             :  *
     969             :  * This field may be NULL (internally set to GDALDummyProgress()).
     970             :  */
     971             : 
     972             : /**
     973             :  * \var void *GDALWarpKernel::pProgress;
     974             :  *
     975             :  * Callback data for pfnProgress.
     976             :  *
     977             :  * This field may be NULL if not required for the pfnProgress being used.
     978             :  */
     979             : 
     980             : /************************************************************************/
     981             : /*                           GDALWarpKernel()                           */
     982             : /************************************************************************/
     983             : 
     984        2357 : GDALWarpKernel::GDALWarpKernel()
     985             :     : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
     986             :       eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
     987             :       dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
     988             :       papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
     989             :       pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
     990             :       papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
     991             :       dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
     992             :       nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
     993             :       nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
     994             :       pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
     995             :       pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
     996        2357 :       padfDstNoDataReal(nullptr), psThreadData(nullptr)
     997             : {
     998        2357 : }
     999             : 
    1000             : /************************************************************************/
    1001             : /*                          ~GDALWarpKernel()                           */
    1002             : /************************************************************************/
    1003             : 
    1004        2357 : GDALWarpKernel::~GDALWarpKernel()
    1005             : {
    1006        2357 : }
    1007             : 
    1008             : /************************************************************************/
    1009             : /*                            PerformWarp()                             */
    1010             : /************************************************************************/
    1011             : 
    1012             : /**
    1013             :  * \fn CPLErr GDALWarpKernel::PerformWarp();
    1014             :  *
    1015             :  * This method performs the warp described in the GDALWarpKernel.
    1016             :  *
    1017             :  * @return CE_None on success or CE_Failure if an error occurs.
    1018             :  */
    1019             : 
    1020        2355 : CPLErr GDALWarpKernel::PerformWarp()
    1021             : 
    1022             : {
    1023        2355 :     const CPLErr eErr = Validate();
    1024             : 
    1025        2355 :     if (eErr != CE_None)
    1026           1 :         return eErr;
    1027             : 
    1028             :     // See #2445 and #3079.
    1029        2354 :     if (nSrcXSize <= 0 || nSrcYSize <= 0)
    1030             :     {
    1031         303 :         if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
    1032             :         {
    1033           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    1034           0 :             return CE_Failure;
    1035             :         }
    1036         303 :         return CE_None;
    1037             :     }
    1038             : 
    1039             :     /* -------------------------------------------------------------------- */
    1040             :     /*      Pre-calculate resampling scales and window sizes for filtering. */
    1041             :     /* -------------------------------------------------------------------- */
    1042             : 
    1043        2051 :     dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
    1044        2051 :     dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
    1045        2051 :     if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
    1046        1242 :         dfXScale = 1.0;
    1047        2051 :     if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
    1048        1001 :         dfYScale = 1.0;
    1049        2051 :     if (dfXScale < 1.0)
    1050             :     {
    1051         526 :         double dfXReciprocalScale = 1.0 / dfXScale;
    1052         526 :         const int nXReciprocalScale =
    1053         526 :             static_cast<int>(dfXReciprocalScale + 0.5);
    1054         526 :         if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
    1055         414 :             dfXScale = 1.0 / nXReciprocalScale;
    1056             :     }
    1057        2051 :     if (dfYScale < 1.0)
    1058             :     {
    1059         490 :         double dfYReciprocalScale = 1.0 / dfYScale;
    1060         490 :         const int nYReciprocalScale =
    1061         490 :             static_cast<int>(dfYReciprocalScale + 0.5);
    1062         490 :         if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
    1063         344 :             dfYScale = 1.0 / nYReciprocalScale;
    1064             :     }
    1065             : 
    1066             :     // XSCALE and YSCALE undocumented for now. Can help in some cases.
    1067             :     // Best would probably be a per-pixel scale computation.
    1068        2051 :     const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
    1069        2051 :     if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
    1070           0 :         dfXScale = CPLAtof(pszXScale);
    1071        2051 :     const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
    1072        2051 :     if (pszYScale != nullptr)
    1073           0 :         dfYScale = CPLAtof(pszYScale);
    1074             : 
    1075             :     // If the xscale is significantly lower than the yscale, this is highly
    1076             :     // suspicious of a situation of wrapping a very large virtual file in
    1077             :     // geographic coordinates with left and right parts being close to the
    1078             :     // antimeridian. In that situation, the xscale computed by the above method
    1079             :     // is completely wrong. Prefer doing an average of a few sample points
    1080             :     // instead
    1081        2051 :     if ((dfYScale / dfXScale > 100 ||
    1082           0 :          (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
    1083             :     {
    1084             :         // Sample points along a grid
    1085           4 :         const int nPointsX = std::min(10, nDstXSize);
    1086           4 :         const int nPointsY = std::min(10, nDstYSize);
    1087           4 :         const int nPoints = 3 * nPointsX * nPointsY;
    1088           8 :         std::vector<double> padfX;
    1089           8 :         std::vector<double> padfY;
    1090           8 :         std::vector<double> padfZ(nPoints);
    1091           8 :         std::vector<int> pabSuccess(nPoints);
    1092          44 :         for (int iY = 0; iY < nPointsY; iY++)
    1093             :         {
    1094         440 :             for (int iX = 0; iX < nPointsX; iX++)
    1095             :             {
    1096         400 :                 const double dfX =
    1097             :                     nPointsX == 1
    1098         400 :                         ? 0.0
    1099         400 :                         : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
    1100         400 :                 const double dfY =
    1101             :                     nPointsY == 1
    1102         400 :                         ? 0.0
    1103         400 :                         : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
    1104             : 
    1105             :                 // Reproject each destination sample point and its neighbours
    1106             :                 // at (x+1,y) and (x,y+1), so as to get the local scale.
    1107         400 :                 padfX.push_back(dfX);
    1108         400 :                 padfY.push_back(dfY);
    1109             : 
    1110         400 :                 padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
    1111         400 :                 padfY.push_back(dfY);
    1112             : 
    1113         400 :                 padfX.push_back(dfX);
    1114         400 :                 padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
    1115             :             }
    1116             :         }
    1117           4 :         pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
    1118           4 :                        &padfZ[0], &pabSuccess[0]);
    1119             : 
    1120             :         // Compute the xscale at each sampling point
    1121           8 :         std::vector<double> adfXScales;
    1122         404 :         for (int i = 0; i < nPoints; i += 3)
    1123             :         {
    1124         400 :             if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
    1125             :             {
    1126             :                 const double dfPointXScale =
    1127         400 :                     1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
    1128         800 :                                    std::abs(padfX[i + 2] - padfX[i]));
    1129         400 :                 adfXScales.push_back(dfPointXScale);
    1130             :             }
    1131             :         }
    1132             : 
    1133             :         // Sort by increasing xcale
    1134           4 :         std::sort(adfXScales.begin(), adfXScales.end());
    1135             : 
    1136           4 :         if (!adfXScales.empty())
    1137             :         {
    1138             :             // Compute the average of scales, but eliminate outliers small
    1139             :             // scales, if some samples are just along the discontinuity.
    1140           4 :             const double dfMaxPointXScale = adfXScales.back();
    1141           4 :             double dfSumPointXScale = 0;
    1142           4 :             int nCountPointScale = 0;
    1143         404 :             for (double dfPointXScale : adfXScales)
    1144             :             {
    1145         400 :                 if (dfPointXScale > dfMaxPointXScale / 10)
    1146             :                 {
    1147         398 :                     dfSumPointXScale += dfPointXScale;
    1148         398 :                     nCountPointScale++;
    1149             :                 }
    1150             :             }
    1151           4 :             if (nCountPointScale > 0)  // should always be true
    1152             :             {
    1153           4 :                 const double dfXScaleFromSampling =
    1154           4 :                     dfSumPointXScale / nCountPointScale;
    1155             : #if DEBUG_VERBOSE
    1156             :                 CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
    1157             :                          dfXScaleFromSampling);
    1158             : #endif
    1159           4 :                 dfXScale = dfXScaleFromSampling;
    1160             :             }
    1161             :         }
    1162             :     }
    1163             : 
    1164             : #if DEBUG_VERBOSE
    1165             :     CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
    1166             : #endif
    1167             : 
    1168        2051 :     const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
    1169             : 
    1170             :     // Safety check for callers that would use GDALWarpKernel without using
    1171             :     // GDALWarpOperation.
    1172        1978 :     if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
    1173        1913 :          ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
    1174        4102 :           !bUse4SamplesFormula)) &&
    1175         400 :         atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
    1176             :             WARP_EXTRA_ELTS)
    1177             :     {
    1178           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1179             :                  "Source arrays must have WARP_EXTRA_ELTS extra elements at "
    1180             :                  "their end. "
    1181             :                  "See GDALWarpKernel class definition. If this condition is "
    1182             :                  "fulfilled, define a EXTRA_ELTS=%d warp options",
    1183             :                  WARP_EXTRA_ELTS);
    1184           0 :         return CE_Failure;
    1185             :     }
    1186             : 
    1187        2051 :     dfXFilter = anGWKFilterRadius[eResample];
    1188        2051 :     dfYFilter = anGWKFilterRadius[eResample];
    1189             : 
    1190        2051 :     nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
    1191        1606 :                               : static_cast<int>(dfXFilter);
    1192        2051 :     nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
    1193        1580 :                               : static_cast<int>(dfYFilter);
    1194             : 
    1195             :     // Filter window offset depends on the parity of the kernel radius.
    1196        2051 :     nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
    1197        2051 :     nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
    1198             : 
    1199        2051 :     bApplyVerticalShift =
    1200        2051 :         CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
    1201        2051 :     dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
    1202        2051 :         papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
    1203             : 
    1204             :     /* -------------------------------------------------------------------- */
    1205             :     /*      Set up resampling functions.                                    */
    1206             :     /* -------------------------------------------------------------------- */
    1207        2051 :     if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
    1208          12 :         return GWKGeneralCase(this);
    1209             : 
    1210             : #if defined(HAVE_OPENCL)
    1211         605 :     if ((eWorkingDataType == GDT_Byte || eWorkingDataType == GDT_CInt16 ||
    1212         433 :          eWorkingDataType == GDT_UInt16 || eWorkingDataType == GDT_Int16 ||
    1213         286 :          eWorkingDataType == GDT_CFloat32 || eWorkingDataType == GDT_Float32) &&
    1214        1901 :         (eResample == GRA_Bilinear || eResample == GRA_Cubic ||
    1215        1419 :          eResample == GRA_CubicSpline || eResample == GRA_Lanczos) &&
    1216        4655 :         !bApplyVerticalShift &&
    1217             :         // OpenCL warping gives different results than the ones expected by autotest,
    1218             :         // so disable it by default even if found.
    1219        1154 :         CPLTestBool(
    1220         577 :             CSLFetchNameValueDef(papszWarpOptions, "USE_OPENCL",
    1221             :                                  CPLGetConfigOption("GDAL_USE_OPENCL", "NO"))))
    1222             :     {
    1223           0 :         if (pafUnifiedSrcDensity != nullptr)
    1224             :         {
    1225             :             // If pafUnifiedSrcDensity is only set to 1.0, then we can
    1226             :             // discard it.
    1227           0 :             bool bFoundNotOne = false;
    1228           0 :             for (GPtrDiff_t j = 0;
    1229           0 :                  j < static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize; j++)
    1230             :             {
    1231           0 :                 if (pafUnifiedSrcDensity[j] != 1.0)
    1232             :                 {
    1233           0 :                     bFoundNotOne = true;
    1234           0 :                     break;
    1235             :                 }
    1236             :             }
    1237           0 :             if (!bFoundNotOne)
    1238             :             {
    1239           0 :                 CPLFree(pafUnifiedSrcDensity);
    1240           0 :                 pafUnifiedSrcDensity = nullptr;
    1241             :             }
    1242             :         }
    1243             : 
    1244           0 :         if (pafUnifiedSrcDensity != nullptr)
    1245             :         {
    1246             :             // Typically if there's a cutline or an alpha band
    1247             :             static bool bHasWarned = false;
    1248           0 :             if (!bHasWarned)
    1249             :             {
    1250           0 :                 bHasWarned = true;
    1251           0 :                 CPLDebug("WARP", "pafUnifiedSrcDensity is not null, "
    1252             :                                  "hence OpenCL warper cannot be used");
    1253             :             }
    1254             :         }
    1255             :         else
    1256             :         {
    1257           0 :             const CPLErr eResult = GWKOpenCLCase(this);
    1258             : 
    1259             :             // CE_Warning tells us a suitable OpenCL environment was not available
    1260             :             // so we fall through to other CPU based methods.
    1261           0 :             if (eResult != CE_Warning)
    1262           0 :                 return eResult;
    1263             :         }
    1264             :     }
    1265             : #endif  // defined HAVE_OPENCL
    1266             : 
    1267        2039 :     const bool bNoMasksOrDstDensityOnly =
    1268        2035 :         papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
    1269        4074 :         pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
    1270             : 
    1271        2039 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour &&
    1272             :         bNoMasksOrDstDensityOnly)
    1273         849 :         return GWKNearestNoMasksOrDstDensityOnlyByte(this);
    1274             : 
    1275        1190 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear &&
    1276             :         bNoMasksOrDstDensityOnly)
    1277         128 :         return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
    1278             : 
    1279        1062 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_Cubic &&
    1280             :         bNoMasksOrDstDensityOnly)
    1281          72 :         return GWKCubicNoMasksOrDstDensityOnlyByte(this);
    1282             : 
    1283         990 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline &&
    1284             :         bNoMasksOrDstDensityOnly)
    1285          16 :         return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
    1286             : 
    1287         974 :     if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour)
    1288         269 :         return GWKNearestByte(this);
    1289             : 
    1290         705 :     if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
    1291         147 :         eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
    1292          24 :         return GWKNearestNoMasksOrDstDensityOnlyShort(this);
    1293             : 
    1294         681 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
    1295             :         bNoMasksOrDstDensityOnly)
    1296           8 :         return GWKCubicNoMasksOrDstDensityOnlyShort(this);
    1297             : 
    1298         673 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
    1299             :         bNoMasksOrDstDensityOnly)
    1300           9 :         return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
    1301             : 
    1302         664 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
    1303             :         bNoMasksOrDstDensityOnly)
    1304          21 :         return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
    1305             : 
    1306         643 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
    1307             :         bNoMasksOrDstDensityOnly)
    1308          14 :         return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
    1309             : 
    1310         629 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
    1311             :         bNoMasksOrDstDensityOnly)
    1312           8 :         return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
    1313             : 
    1314         621 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
    1315             :         bNoMasksOrDstDensityOnly)
    1316           9 :         return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
    1317             : 
    1318         612 :     if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
    1319          54 :         eResample == GRA_NearestNeighbour)
    1320          22 :         return GWKNearestShort(this);
    1321             : 
    1322         590 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
    1323             :         bNoMasksOrDstDensityOnly)
    1324          14 :         return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
    1325             : 
    1326         576 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
    1327          31 :         return GWKNearestFloat(this);
    1328             : 
    1329         545 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
    1330             :         bNoMasksOrDstDensityOnly)
    1331           5 :         return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
    1332             : 
    1333         540 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
    1334             :         bNoMasksOrDstDensityOnly)
    1335          39 :         return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
    1336             : 
    1337             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    1338             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
    1339             :         bNoMasksOrDstDensityOnly)
    1340             :         return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
    1341             : 
    1342             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
    1343             :         bNoMasksOrDstDensityOnly)
    1344             :         return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
    1345             : #endif
    1346             : 
    1347         501 :     if (eResample == GRA_Average)
    1348          70 :         return GWKAverageOrMode(this);
    1349             : 
    1350         431 :     if (eResample == GRA_RMS)
    1351           9 :         return GWKAverageOrMode(this);
    1352             : 
    1353         422 :     if (eResample == GRA_Mode)
    1354          11 :         return GWKAverageOrMode(this);
    1355             : 
    1356         411 :     if (eResample == GRA_Max)
    1357           6 :         return GWKAverageOrMode(this);
    1358             : 
    1359         405 :     if (eResample == GRA_Min)
    1360           5 :         return GWKAverageOrMode(this);
    1361             : 
    1362         400 :     if (eResample == GRA_Med)
    1363           6 :         return GWKAverageOrMode(this);
    1364             : 
    1365         394 :     if (eResample == GRA_Q1)
    1366           5 :         return GWKAverageOrMode(this);
    1367             : 
    1368         389 :     if (eResample == GRA_Q3)
    1369           5 :         return GWKAverageOrMode(this);
    1370             : 
    1371         384 :     if (eResample == GRA_Sum)
    1372          18 :         return GWKSumPreserving(this);
    1373             : 
    1374         366 :     if (!GDALDataTypeIsComplex(eWorkingDataType))
    1375             :     {
    1376         135 :         return GWKRealCase(this);
    1377             :     }
    1378             : 
    1379         231 :     return GWKGeneralCase(this);
    1380             : }
    1381             : 
    1382             : /************************************************************************/
    1383             : /*                              Validate()                              */
    1384             : /************************************************************************/
    1385             : 
    1386             : /**
    1387             :  * \fn CPLErr GDALWarpKernel::Validate()
    1388             :  *
    1389             :  * Check the settings in the GDALWarpKernel, and issue a CPLError()
    1390             :  * (and return CE_Failure) if the configuration is considered to be
    1391             :  * invalid for some reason.
    1392             :  *
    1393             :  * This method will also do some standard defaulting such as setting
    1394             :  * pfnProgress to GDALDummyProgress() if it is NULL.
    1395             :  *
    1396             :  * @return CE_None on success or CE_Failure if an error is detected.
    1397             :  */
    1398             : 
    1399        2355 : CPLErr GDALWarpKernel::Validate()
    1400             : 
    1401             : {
    1402        2355 :     if (static_cast<size_t>(eResample) >=
    1403             :         (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
    1404             :     {
    1405           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1406             :                  "Unsupported resampling method %d.",
    1407           0 :                  static_cast<int>(eResample));
    1408           0 :         return CE_Failure;
    1409             :     }
    1410             : 
    1411             :     // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
    1412             :     // be ignored as contributing source pixels during resampling. Only taken into account by
    1413             :     // Average currently
    1414             :     const char *pszExcludedValues =
    1415        2355 :         CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
    1416        2355 :     if (pszExcludedValues)
    1417             :     {
    1418             :         const CPLStringList aosTokens(
    1419          12 :             CSLTokenizeString2(pszExcludedValues, "(,)", 0));
    1420          12 :         if ((aosTokens.size() % nBands) != 0)
    1421             :         {
    1422           1 :             CPLError(CE_Failure, CPLE_AppDefined,
    1423             :                      "EXCLUDED_VALUES should contain one or several tuples of "
    1424             :                      "%d values formatted like <R>,<G>,<B> or "
    1425             :                      "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
    1426             :                      "tuples",
    1427             :                      nBands);
    1428           1 :             return CE_Failure;
    1429             :         }
    1430          22 :         std::vector<double> adfTuple;
    1431          36 :         for (int i = 0; i < aosTokens.size(); ++i)
    1432             :         {
    1433          25 :             adfTuple.push_back(CPLAtof(aosTokens[i]));
    1434          25 :             if (((i + 1) % nBands) == 0)
    1435             :             {
    1436          11 :                 m_aadfExcludedValues.push_back(adfTuple);
    1437          11 :                 adfTuple.clear();
    1438             :             }
    1439             :         }
    1440             :     }
    1441             : 
    1442        2354 :     return CE_None;
    1443             : }
    1444             : 
    1445             : /************************************************************************/
    1446             : /*                         GWKOverlayDensity()                          */
    1447             : /*                                                                      */
    1448             : /*      Compute the final density for the destination pixel.  This      */
    1449             : /*      is a function of the overlay density (passed in) and the        */
    1450             : /*      original density.                                               */
    1451             : /************************************************************************/
    1452             : 
    1453     7399030 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
    1454             :                               double dfDensity)
    1455             : {
    1456     7399030 :     if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
    1457     6208160 :         return;
    1458             : 
    1459     1190880 :     poWK->pafDstDensity[iDstOffset] = static_cast<float>(
    1460     1190880 :         1.0 - (1.0 - dfDensity) * (1.0 - poWK->pafDstDensity[iDstOffset]));
    1461             : }
    1462             : 
    1463             : /************************************************************************/
    1464             : /*                          GWKRoundValueT()                            */
    1465             : /************************************************************************/
    1466             : 
    1467             : template <class T, bool is_signed> struct sGWKRoundValueT
    1468             : {
    1469             :     static T eval(double);
    1470             : };
    1471             : 
    1472             : template <class T> struct sGWKRoundValueT<T, true> /* signed */
    1473             : {
    1474     2312700 :     static T eval(double dfValue)
    1475             :     {
    1476     2312700 :         return static_cast<T>(floor(dfValue + 0.5));
    1477             :     }
    1478             : };
    1479             : 
    1480             : template <class T> struct sGWKRoundValueT<T, false> /* unsigned */
    1481             : {
    1482    12553472 :     static T eval(double dfValue)
    1483             :     {
    1484    12553472 :         return static_cast<T>(dfValue + 0.5);
    1485             :     }
    1486             : };
    1487             : 
    1488    14838672 : template <class T> static T GWKRoundValueT(double dfValue)
    1489             : {
    1490    14838672 :     return sGWKRoundValueT<T, std::numeric_limits<T>::is_signed>::eval(dfValue);
    1491             : }
    1492             : 
    1493      269075 : template <> float GWKRoundValueT<float>(double dfValue)
    1494             : {
    1495      269075 :     return static_cast<float>(dfValue);
    1496             : }
    1497             : 
    1498             : #ifdef notused
    1499             : template <> double GWKRoundValueT<double>(double dfValue)
    1500             : {
    1501             :     return dfValue;
    1502             : }
    1503             : #endif
    1504             : 
    1505             : /************************************************************************/
    1506             : /*                            GWKClampValueT()                          */
    1507             : /************************************************************************/
    1508             : 
    1509    10118579 : template <class T> static CPL_INLINE T GWKClampValueT(double dfValue)
    1510             : {
    1511    10118579 :     if (dfValue < std::numeric_limits<T>::min())
    1512        3762 :         return std::numeric_limits<T>::min();
    1513    10130949 :     else if (dfValue > std::numeric_limits<T>::max())
    1514       17314 :         return std::numeric_limits<T>::max();
    1515             :     else
    1516    10115399 :         return GWKRoundValueT<T>(dfValue);
    1517             : }
    1518             : 
    1519      718913 : template <> float GWKClampValueT<float>(double dfValue)
    1520             : {
    1521      718913 :     return static_cast<float>(dfValue);
    1522             : }
    1523             : 
    1524             : #ifdef notused
    1525             : template <> double GWKClampValueT<double>(double dfValue)
    1526             : {
    1527             :     return dfValue;
    1528             : }
    1529             : #endif
    1530             : 
    1531             : /************************************************************************/
    1532             : /*                         GWKSetPixelValueRealT()                      */
    1533             : /************************************************************************/
    1534             : 
    1535             : template <class T>
    1536      159076 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
    1537             :                                   GPtrDiff_t iDstOffset, double dfDensity,
    1538             :                                   T value)
    1539             : {
    1540      159076 :     T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    1541             : 
    1542             :     /* -------------------------------------------------------------------- */
    1543             :     /*      If the source density is less than 100% we need to fetch the    */
    1544             :     /*      existing destination value, and mix it with the source to       */
    1545             :     /*      get the new "to apply" value.  Also compute composite           */
    1546             :     /*      density.                                                        */
    1547             :     /*                                                                      */
    1548             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1549             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1550             :     /* -------------------------------------------------------------------- */
    1551      159076 :     if (dfDensity < 0.9999)
    1552             :     {
    1553      159076 :         if (dfDensity < 0.0001)
    1554           0 :             return true;
    1555             : 
    1556      159076 :         double dfDstDensity = 1.0;
    1557             : 
    1558      159076 :         if (poWK->pafDstDensity != nullptr)
    1559      157604 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1560        1472 :         else if (poWK->panDstValid != nullptr &&
    1561           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1562           0 :             dfDstDensity = 0.0;
    1563             : 
    1564             :         // It seems like we also ought to be testing panDstValid[] here!
    1565             : 
    1566      159076 :         const double dfDstReal = pDst[iDstOffset];
    1567             : 
    1568             :         // The destination density is really only relative to the portion
    1569             :         // not occluded by the overlay.
    1570      159076 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1571             : 
    1572      159076 :         const double dfReal = (value * dfDensity + dfDstReal * dfDstInfluence) /
    1573      159076 :                               (dfDensity + dfDstInfluence);
    1574             : 
    1575             :         /* --------------------------------------------------------------------
    1576             :          */
    1577             :         /*      Actually apply the destination value. */
    1578             :         /*                                                                      */
    1579             :         /*      Avoid using the destination nodata value for integer datatypes
    1580             :          */
    1581             :         /*      if by chance it is equal to the computed pixel value. */
    1582             :         /* --------------------------------------------------------------------
    1583             :          */
    1584      159076 :         pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
    1585             :     }
    1586             :     else
    1587             :     {
    1588           0 :         pDst[iDstOffset] = value;
    1589             :     }
    1590             : 
    1591      159076 :     if (poWK->padfDstNoDataReal != nullptr &&
    1592           0 :         poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
    1593             :     {
    1594           0 :         if (pDst[iDstOffset] == std::numeric_limits<T>::min())
    1595           0 :             pDst[iDstOffset] = std::numeric_limits<T>::min() + 1;
    1596             :         else
    1597           0 :             pDst[iDstOffset]--;
    1598             :     }
    1599             : 
    1600      159076 :     return true;
    1601             : }
    1602             : 
    1603             : /************************************************************************/
    1604             : /*                          GWKSetPixelValue()                          */
    1605             : /************************************************************************/
    1606             : 
    1607     3627610 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
    1608             :                              GPtrDiff_t iDstOffset, double dfDensity,
    1609             :                              double dfReal, double dfImag)
    1610             : 
    1611             : {
    1612     3627610 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1613             : 
    1614             :     /* -------------------------------------------------------------------- */
    1615             :     /*      If the source density is less than 100% we need to fetch the    */
    1616             :     /*      existing destination value, and mix it with the source to       */
    1617             :     /*      get the new "to apply" value.  Also compute composite           */
    1618             :     /*      density.                                                        */
    1619             :     /*                                                                      */
    1620             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1621             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1622             :     /* -------------------------------------------------------------------- */
    1623     3627610 :     if (dfDensity < 0.9999)
    1624             :     {
    1625         800 :         if (dfDensity < 0.0001)
    1626           0 :             return true;
    1627             : 
    1628         800 :         double dfDstDensity = 1.0;
    1629         800 :         if (poWK->pafDstDensity != nullptr)
    1630         800 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1631           0 :         else if (poWK->panDstValid != nullptr &&
    1632           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1633           0 :             dfDstDensity = 0.0;
    1634             : 
    1635         800 :         double dfDstReal = 0.0;
    1636         800 :         double dfDstImag = 0.0;
    1637             :         // It seems like we also ought to be testing panDstValid[] here!
    1638             : 
    1639             :         // TODO(schwehr): Factor out this repreated type of set.
    1640         800 :         switch (poWK->eWorkingDataType)
    1641             :         {
    1642           0 :             case GDT_Byte:
    1643           0 :                 dfDstReal = pabyDst[iDstOffset];
    1644           0 :                 dfDstImag = 0.0;
    1645           0 :                 break;
    1646             : 
    1647           0 :             case GDT_Int8:
    1648           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1649           0 :                 dfDstImag = 0.0;
    1650           0 :                 break;
    1651             : 
    1652         400 :             case GDT_Int16:
    1653         400 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1654         400 :                 dfDstImag = 0.0;
    1655         400 :                 break;
    1656             : 
    1657         400 :             case GDT_UInt16:
    1658         400 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1659         400 :                 dfDstImag = 0.0;
    1660         400 :                 break;
    1661             : 
    1662           0 :             case GDT_Int32:
    1663           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1664           0 :                 dfDstImag = 0.0;
    1665           0 :                 break;
    1666             : 
    1667           0 :             case GDT_UInt32:
    1668           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1669           0 :                 dfDstImag = 0.0;
    1670           0 :                 break;
    1671             : 
    1672           0 :             case GDT_Int64:
    1673           0 :                 dfDstReal = static_cast<double>(
    1674           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1675           0 :                 dfDstImag = 0.0;
    1676           0 :                 break;
    1677             : 
    1678           0 :             case GDT_UInt64:
    1679           0 :                 dfDstReal = static_cast<double>(
    1680           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1681           0 :                 dfDstImag = 0.0;
    1682           0 :                 break;
    1683             : 
    1684           0 :             case GDT_Float32:
    1685           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
    1686           0 :                 dfDstImag = 0.0;
    1687           0 :                 break;
    1688             : 
    1689           0 :             case GDT_Float64:
    1690           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1691           0 :                 dfDstImag = 0.0;
    1692           0 :                 break;
    1693             : 
    1694           0 :             case GDT_CInt16:
    1695           0 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
    1696           0 :                 dfDstImag =
    1697           0 :                     reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
    1698           0 :                 break;
    1699             : 
    1700           0 :             case GDT_CInt32:
    1701           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
    1702           0 :                 dfDstImag =
    1703           0 :                     reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
    1704           0 :                 break;
    1705             : 
    1706           0 :             case GDT_CFloat32:
    1707           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset * 2];
    1708           0 :                 dfDstImag =
    1709           0 :                     reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1];
    1710           0 :                 break;
    1711             : 
    1712           0 :             case GDT_CFloat64:
    1713           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
    1714           0 :                 dfDstImag =
    1715           0 :                     reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
    1716           0 :                 break;
    1717             : 
    1718           0 :             case GDT_Unknown:
    1719             :             case GDT_TypeCount:
    1720           0 :                 CPLAssert(false);
    1721             :                 return false;
    1722             :         }
    1723             : 
    1724             :         // The destination density is really only relative to the portion
    1725             :         // not occluded by the overlay.
    1726         800 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1727             : 
    1728         800 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    1729         800 :                  (dfDensity + dfDstInfluence);
    1730             : 
    1731         800 :         dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
    1732         800 :                  (dfDensity + dfDstInfluence);
    1733             :     }
    1734             : 
    1735             : /* -------------------------------------------------------------------- */
    1736             : /*      Actually apply the destination value.                           */
    1737             : /*                                                                      */
    1738             : /*      Avoid using the destination nodata value for integer datatypes  */
    1739             : /*      if by chance it is equal to the computed pixel value.           */
    1740             : /* -------------------------------------------------------------------- */
    1741             : 
    1742             : // TODO(schwehr): Can we make this a template?
    1743             : #define CLAMP(type)                                                            \
    1744             :     do                                                                         \
    1745             :     {                                                                          \
    1746             :         type *_pDst = reinterpret_cast<type *>(pabyDst);                       \
    1747             :         if (dfReal < static_cast<double>(std::numeric_limits<type>::min()))    \
    1748             :             _pDst[iDstOffset] =                                                \
    1749             :                 static_cast<type>(std::numeric_limits<type>::min());           \
    1750             :         else if (dfReal >                                                      \
    1751             :                  static_cast<double>(std::numeric_limits<type>::max()))        \
    1752             :             _pDst[iDstOffset] =                                                \
    1753             :                 static_cast<type>(std::numeric_limits<type>::max());           \
    1754             :         else                                                                   \
    1755             :             _pDst[iDstOffset] = (std::numeric_limits<type>::is_signed)         \
    1756             :                                     ? static_cast<type>(floor(dfReal + 0.5))   \
    1757             :                                     : static_cast<type>(dfReal + 0.5);         \
    1758             :         if (poWK->padfDstNoDataReal != nullptr &&                              \
    1759             :             poWK->padfDstNoDataReal[iBand] ==                                  \
    1760             :                 static_cast<double>(_pDst[iDstOffset]))                        \
    1761             :         {                                                                      \
    1762             :             if (_pDst[iDstOffset] ==                                           \
    1763             :                 static_cast<type>(std::numeric_limits<type>::min()))           \
    1764             :                 _pDst[iDstOffset] =                                            \
    1765             :                     static_cast<type>(std::numeric_limits<type>::min() + 1);   \
    1766             :             else                                                               \
    1767             :                 _pDst[iDstOffset]--;                                           \
    1768             :         }                                                                      \
    1769             :     } while (false)
    1770             : 
    1771     3627610 :     switch (poWK->eWorkingDataType)
    1772             :     {
    1773     2901430 :         case GDT_Byte:
    1774     2901430 :             CLAMP(GByte);
    1775     2901430 :             break;
    1776             : 
    1777           0 :         case GDT_Int8:
    1778           0 :             CLAMP(GInt8);
    1779           0 :             break;
    1780             : 
    1781        7465 :         case GDT_Int16:
    1782        7465 :             CLAMP(GInt16);
    1783        7465 :             break;
    1784             : 
    1785         463 :         case GDT_UInt16:
    1786         463 :             CLAMP(GUInt16);
    1787         463 :             break;
    1788             : 
    1789          63 :         case GDT_UInt32:
    1790          63 :             CLAMP(GUInt32);
    1791          63 :             break;
    1792             : 
    1793        3463 :         case GDT_Int32:
    1794        3463 :             CLAMP(GInt32);
    1795        3463 :             break;
    1796             : 
    1797           0 :         case GDT_UInt64:
    1798           0 :             CLAMP(std::uint64_t);
    1799           0 :             break;
    1800             : 
    1801           0 :         case GDT_Int64:
    1802           0 :             CLAMP(std::int64_t);
    1803           0 :             break;
    1804             : 
    1805      478957 :         case GDT_Float32:
    1806      478957 :             reinterpret_cast<float *>(pabyDst)[iDstOffset] =
    1807      478957 :                 static_cast<float>(dfReal);
    1808      478957 :             break;
    1809             : 
    1810         147 :         case GDT_Float64:
    1811         147 :             reinterpret_cast<double *>(pabyDst)[iDstOffset] = dfReal;
    1812         147 :             break;
    1813             : 
    1814      234178 :         case GDT_CInt16:
    1815             :         {
    1816             :             typedef GInt16 T;
    1817      234178 :             if (dfReal < static_cast<double>(std::numeric_limits<T>::min()))
    1818           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1819           0 :                     std::numeric_limits<T>::min();
    1820      234178 :             else if (dfReal >
    1821      234178 :                      static_cast<double>(std::numeric_limits<T>::max()))
    1822           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1823           0 :                     std::numeric_limits<T>::max();
    1824             :             else
    1825      234178 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1826      234178 :                     static_cast<T>(floor(dfReal + 0.5));
    1827      234178 :             if (dfImag < static_cast<double>(std::numeric_limits<T>::min()))
    1828           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1829           0 :                     std::numeric_limits<T>::min();
    1830      234178 :             else if (dfImag >
    1831      234178 :                      static_cast<double>(std::numeric_limits<T>::max()))
    1832           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1833           0 :                     std::numeric_limits<T>::max();
    1834             :             else
    1835      234178 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1836      234178 :                     static_cast<T>(floor(dfImag + 0.5));
    1837      234178 :             break;
    1838             :         }
    1839             : 
    1840         478 :         case GDT_CInt32:
    1841             :         {
    1842             :             typedef GInt32 T;
    1843         478 :             if (dfReal < static_cast<double>(std::numeric_limits<T>::min()))
    1844           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1845           0 :                     std::numeric_limits<T>::min();
    1846         478 :             else if (dfReal >
    1847         478 :                      static_cast<double>(std::numeric_limits<T>::max()))
    1848           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1849           0 :                     std::numeric_limits<T>::max();
    1850             :             else
    1851         478 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    1852         478 :                     static_cast<T>(floor(dfReal + 0.5));
    1853         478 :             if (dfImag < static_cast<double>(std::numeric_limits<T>::min()))
    1854           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1855           0 :                     std::numeric_limits<T>::min();
    1856         478 :             else if (dfImag >
    1857         478 :                      static_cast<double>(std::numeric_limits<T>::max()))
    1858           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1859           0 :                     std::numeric_limits<T>::max();
    1860             :             else
    1861         478 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    1862         478 :                     static_cast<T>(floor(dfImag + 0.5));
    1863         478 :             break;
    1864             :         }
    1865             : 
    1866         490 :         case GDT_CFloat32:
    1867         490 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
    1868         490 :                 static_cast<float>(dfReal);
    1869         490 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
    1870         490 :                 static_cast<float>(dfImag);
    1871         490 :             break;
    1872             : 
    1873         478 :         case GDT_CFloat64:
    1874         478 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
    1875         478 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
    1876         478 :             break;
    1877             : 
    1878           0 :         case GDT_Unknown:
    1879             :         case GDT_TypeCount:
    1880           0 :             return false;
    1881             :     }
    1882             : 
    1883     3627610 :     return true;
    1884             : }
    1885             : 
    1886             : /************************************************************************/
    1887             : /*                       GWKSetPixelValueReal()                         */
    1888             : /************************************************************************/
    1889             : 
    1890      840011 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    1891             :                                  GPtrDiff_t iDstOffset, double dfDensity,
    1892             :                                  double dfReal)
    1893             : 
    1894             : {
    1895      840011 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1896             : 
    1897             :     /* -------------------------------------------------------------------- */
    1898             :     /*      If the source density is less than 100% we need to fetch the    */
    1899             :     /*      existing destination value, and mix it with the source to       */
    1900             :     /*      get the new "to apply" value.  Also compute composite           */
    1901             :     /*      density.                                                        */
    1902             :     /*                                                                      */
    1903             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1904             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1905             :     /* -------------------------------------------------------------------- */
    1906      840011 :     if (dfDensity < 0.9999)
    1907             :     {
    1908         600 :         if (dfDensity < 0.0001)
    1909           0 :             return true;
    1910             : 
    1911         600 :         double dfDstReal = 0.0;
    1912         600 :         double dfDstDensity = 1.0;
    1913             : 
    1914         600 :         if (poWK->pafDstDensity != nullptr)
    1915         600 :             dfDstDensity = poWK->pafDstDensity[iDstOffset];
    1916           0 :         else if (poWK->panDstValid != nullptr &&
    1917           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1918           0 :             dfDstDensity = 0.0;
    1919             : 
    1920             :         // It seems like we also ought to be testing panDstValid[] here!
    1921             : 
    1922         600 :         switch (poWK->eWorkingDataType)
    1923             :         {
    1924           0 :             case GDT_Byte:
    1925           0 :                 dfDstReal = pabyDst[iDstOffset];
    1926           0 :                 break;
    1927             : 
    1928           0 :             case GDT_Int8:
    1929           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1930           0 :                 break;
    1931             : 
    1932         300 :             case GDT_Int16:
    1933         300 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1934         300 :                 break;
    1935             : 
    1936         300 :             case GDT_UInt16:
    1937         300 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1938         300 :                 break;
    1939             : 
    1940           0 :             case GDT_Int32:
    1941           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1942           0 :                 break;
    1943             : 
    1944           0 :             case GDT_UInt32:
    1945           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1946           0 :                 break;
    1947             : 
    1948           0 :             case GDT_Int64:
    1949           0 :                 dfDstReal = static_cast<double>(
    1950           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1951           0 :                 break;
    1952             : 
    1953           0 :             case GDT_UInt64:
    1954           0 :                 dfDstReal = static_cast<double>(
    1955           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1956           0 :                 break;
    1957             : 
    1958           0 :             case GDT_Float32:
    1959           0 :                 dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
    1960           0 :                 break;
    1961             : 
    1962           0 :             case GDT_Float64:
    1963           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1964           0 :                 break;
    1965             : 
    1966           0 :             case GDT_CInt16:
    1967             :             case GDT_CInt32:
    1968             :             case GDT_CFloat32:
    1969             :             case GDT_CFloat64:
    1970             :             case GDT_Unknown:
    1971             :             case GDT_TypeCount:
    1972           0 :                 CPLAssert(false);
    1973             :                 return false;
    1974             :         }
    1975             : 
    1976             :         // The destination density is really only relative to the portion
    1977             :         // not occluded by the overlay.
    1978         600 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1979             : 
    1980         600 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    1981         600 :                  (dfDensity + dfDstInfluence);
    1982             :     }
    1983             : 
    1984             :     /* -------------------------------------------------------------------- */
    1985             :     /*      Actually apply the destination value.                           */
    1986             :     /*                                                                      */
    1987             :     /*      Avoid using the destination nodata value for integer datatypes  */
    1988             :     /*      if by chance it is equal to the computed pixel value.           */
    1989             :     /* -------------------------------------------------------------------- */
    1990             : 
    1991      840011 :     switch (poWK->eWorkingDataType)
    1992             :     {
    1993      832986 :         case GDT_Byte:
    1994      832986 :             CLAMP(GByte);
    1995      832986 :             break;
    1996             : 
    1997           0 :         case GDT_Int8:
    1998           0 :             CLAMP(GInt8);
    1999           0 :             break;
    2000             : 
    2001        1085 :         case GDT_Int16:
    2002        1085 :             CLAMP(GInt16);
    2003        1085 :             break;
    2004             : 
    2005         363 :         case GDT_UInt16:
    2006         363 :             CLAMP(GUInt16);
    2007         363 :             break;
    2008             : 
    2009         315 :         case GDT_UInt32:
    2010         315 :             CLAMP(GUInt32);
    2011         315 :             break;
    2012             : 
    2013        1318 :         case GDT_Int32:
    2014        1318 :             CLAMP(GInt32);
    2015        1318 :             break;
    2016             : 
    2017           0 :         case GDT_UInt64:
    2018           0 :             CLAMP(std::uint64_t);
    2019           0 :             break;
    2020             : 
    2021         100 :         case GDT_Int64:
    2022         100 :             CLAMP(std::int64_t);
    2023         100 :             break;
    2024             : 
    2025        3426 :         case GDT_Float32:
    2026        3426 :             reinterpret_cast<float *>(pabyDst)[iDstOffset] =
    2027        3426 :                 static_cast<float>(dfReal);
    2028        3426 :             break;
    2029             : 
    2030         418 :         case GDT_Float64:
    2031         418 :             reinterpret_cast<double *>(pabyDst)[iDstOffset] = dfReal;
    2032         418 :             break;
    2033             : 
    2034           0 :         case GDT_CInt16:
    2035             :         case GDT_CInt32:
    2036             :         case GDT_CFloat32:
    2037             :         case GDT_CFloat64:
    2038           0 :             return false;
    2039             : 
    2040           0 :         case GDT_Unknown:
    2041             :         case GDT_TypeCount:
    2042           0 :             CPLAssert(false);
    2043             :             return false;
    2044             :     }
    2045             : 
    2046      840011 :     return true;
    2047             : }
    2048             : 
    2049             : /************************************************************************/
    2050             : /*                          GWKGetPixelValue()                          */
    2051             : /************************************************************************/
    2052             : 
    2053             : /* It is assumed that panUnifiedSrcValid has been checked before */
    2054             : 
    2055    29095700 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
    2056             :                              GPtrDiff_t iSrcOffset, double *pdfDensity,
    2057             :                              double *pdfReal, double *pdfImag)
    2058             : 
    2059             : {
    2060    29095700 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2061             : 
    2062    58191500 :     if (poWK->papanBandSrcValid != nullptr &&
    2063    29095700 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2064           0 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2065             :     {
    2066           0 :         *pdfDensity = 0.0;
    2067           0 :         return false;
    2068             :     }
    2069             : 
    2070    29095700 :     *pdfReal = 0.0;
    2071    29095700 :     *pdfImag = 0.0;
    2072             : 
    2073             :     // TODO(schwehr): Fix casting.
    2074    29095700 :     switch (poWK->eWorkingDataType)
    2075             :     {
    2076    28005300 :         case GDT_Byte:
    2077    28005300 :             *pdfReal = pabySrc[iSrcOffset];
    2078    28005300 :             *pdfImag = 0.0;
    2079    28005300 :             break;
    2080             : 
    2081           0 :         case GDT_Int8:
    2082           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2083           0 :             *pdfImag = 0.0;
    2084           0 :             break;
    2085             : 
    2086       28181 :         case GDT_Int16:
    2087       28181 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2088       28181 :             *pdfImag = 0.0;
    2089       28181 :             break;
    2090             : 
    2091         163 :         case GDT_UInt16:
    2092         163 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2093         163 :             *pdfImag = 0.0;
    2094         163 :             break;
    2095             : 
    2096       13663 :         case GDT_Int32:
    2097       13663 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2098       13663 :             *pdfImag = 0.0;
    2099       13663 :             break;
    2100             : 
    2101          63 :         case GDT_UInt32:
    2102          63 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2103          63 :             *pdfImag = 0.0;
    2104          63 :             break;
    2105             : 
    2106           0 :         case GDT_Int64:
    2107           0 :             *pdfReal = static_cast<double>(
    2108           0 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2109           0 :             *pdfImag = 0.0;
    2110           0 :             break;
    2111             : 
    2112           0 :         case GDT_UInt64:
    2113           0 :             *pdfReal = static_cast<double>(
    2114           0 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2115           0 :             *pdfImag = 0.0;
    2116           0 :             break;
    2117             : 
    2118     1047220 :         case GDT_Float32:
    2119     1047220 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
    2120     1047220 :             *pdfImag = 0.0;
    2121     1047220 :             break;
    2122             : 
    2123         582 :         case GDT_Float64:
    2124         582 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2125         582 :             *pdfImag = 0.0;
    2126         582 :             break;
    2127             : 
    2128         130 :         case GDT_CInt16:
    2129         130 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
    2130         130 :             *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2131         130 :             break;
    2132             : 
    2133         130 :         case GDT_CInt32:
    2134         130 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
    2135         130 :             *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
    2136         130 :             break;
    2137             : 
    2138         178 :         case GDT_CFloat32:
    2139         178 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2];
    2140         178 :             *pdfImag = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1];
    2141         178 :             break;
    2142             : 
    2143         130 :         case GDT_CFloat64:
    2144         130 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
    2145         130 :             *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
    2146         130 :             break;
    2147             : 
    2148           0 :         case GDT_Unknown:
    2149             :         case GDT_TypeCount:
    2150           0 :             CPLAssert(false);
    2151             :             *pdfDensity = 0.0;
    2152             :             return false;
    2153             :     }
    2154             : 
    2155    29095700 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2156     3014960 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2157             :     else
    2158    26080800 :         *pdfDensity = 1.0;
    2159             : 
    2160    29095700 :     return *pdfDensity != 0.0;
    2161             : }
    2162             : 
    2163             : /************************************************************************/
    2164             : /*                       GWKGetPixelValueReal()                         */
    2165             : /************************************************************************/
    2166             : 
    2167        1012 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2168             :                                  GPtrDiff_t iSrcOffset, double *pdfDensity,
    2169             :                                  double *pdfReal)
    2170             : 
    2171             : {
    2172        1012 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2173             : 
    2174        2026 :     if (poWK->papanBandSrcValid != nullptr &&
    2175        1014 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2176           2 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2177             :     {
    2178           0 :         *pdfDensity = 0.0;
    2179           0 :         return false;
    2180             :     }
    2181             : 
    2182        1012 :     switch (poWK->eWorkingDataType)
    2183             :     {
    2184           1 :         case GDT_Byte:
    2185           1 :             *pdfReal = pabySrc[iSrcOffset];
    2186           1 :             break;
    2187             : 
    2188           0 :         case GDT_Int8:
    2189           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2190           0 :             break;
    2191             : 
    2192           1 :         case GDT_Int16:
    2193           1 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2194           1 :             break;
    2195             : 
    2196           1 :         case GDT_UInt16:
    2197           1 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2198           1 :             break;
    2199             : 
    2200         870 :         case GDT_Int32:
    2201         870 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2202         870 :             break;
    2203             : 
    2204          67 :         case GDT_UInt32:
    2205          67 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2206          67 :             break;
    2207             : 
    2208           0 :         case GDT_Int64:
    2209           0 :             *pdfReal = static_cast<double>(
    2210           0 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2211           0 :             break;
    2212             : 
    2213           0 :         case GDT_UInt64:
    2214           0 :             *pdfReal = static_cast<double>(
    2215           0 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2216           0 :             break;
    2217             : 
    2218           2 :         case GDT_Float32:
    2219           2 :             *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
    2220           2 :             break;
    2221             : 
    2222          70 :         case GDT_Float64:
    2223          70 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2224          70 :             break;
    2225             : 
    2226           0 :         case GDT_CInt16:
    2227             :         case GDT_CInt32:
    2228             :         case GDT_CFloat32:
    2229             :         case GDT_CFloat64:
    2230             :         case GDT_Unknown:
    2231             :         case GDT_TypeCount:
    2232           0 :             CPLAssert(false);
    2233             :             return false;
    2234             :     }
    2235             : 
    2236        1012 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2237           0 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2238             :     else
    2239        1012 :         *pdfDensity = 1.0;
    2240             : 
    2241        1012 :     return *pdfDensity != 0.0;
    2242             : }
    2243             : 
    2244             : /************************************************************************/
    2245             : /*                          GWKGetPixelRow()                            */
    2246             : /************************************************************************/
    2247             : 
    2248             : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
    2249             : /* data-types. */
    2250             : 
    2251     9318260 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
    2252             :                            GPtrDiff_t iSrcOffset, int nHalfSrcLen,
    2253             :                            double *padfDensity, double adfReal[],
    2254             :                            double *padfImag)
    2255             : {
    2256             :     // We know that nSrcLen is even, so we can *always* unroll loops 2x.
    2257     9318260 :     const int nSrcLen = nHalfSrcLen * 2;
    2258     9318260 :     bool bHasValid = false;
    2259             : 
    2260     9318260 :     if (padfDensity != nullptr)
    2261             :     {
    2262             :         // Init the density.
    2263     3345770 :         for (int i = 0; i < nSrcLen; i += 2)
    2264             :         {
    2265     2189510 :             padfDensity[i] = 1.0;
    2266     2189510 :             padfDensity[i + 1] = 1.0;
    2267             :         }
    2268             : 
    2269     1156260 :         if (poWK->panUnifiedSrcValid != nullptr)
    2270             :         {
    2271     3281460 :             for (int i = 0; i < nSrcLen; i += 2)
    2272             :             {
    2273     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
    2274     2067740 :                     bHasValid = true;
    2275             :                 else
    2276       74323 :                     padfDensity[i] = 0.0;
    2277             : 
    2278     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
    2279     2068400 :                     bHasValid = true;
    2280             :                 else
    2281       73668 :                     padfDensity[i + 1] = 0.0;
    2282             :             }
    2283             : 
    2284             :             // Reset or fail as needed.
    2285     1139400 :             if (bHasValid)
    2286     1116590 :                 bHasValid = false;
    2287             :             else
    2288       22806 :                 return false;
    2289             :         }
    2290             : 
    2291     1133450 :         if (poWK->papanBandSrcValid != nullptr &&
    2292           0 :             poWK->papanBandSrcValid[iBand] != nullptr)
    2293             :         {
    2294           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2295             :             {
    2296           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
    2297           0 :                     bHasValid = true;
    2298             :                 else
    2299           0 :                     padfDensity[i] = 0.0;
    2300             : 
    2301           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
    2302           0 :                                iSrcOffset + i + 1))
    2303           0 :                     bHasValid = true;
    2304             :                 else
    2305           0 :                     padfDensity[i + 1] = 0.0;
    2306             :             }
    2307             : 
    2308             :             // Reset or fail as needed.
    2309           0 :             if (bHasValid)
    2310           0 :                 bHasValid = false;
    2311             :             else
    2312           0 :                 return false;
    2313             :         }
    2314             :     }
    2315             : 
    2316             :     // TODO(schwehr): Fix casting.
    2317             :     // Fetch data.
    2318     9295450 :     switch (poWK->eWorkingDataType)
    2319             :     {
    2320     8085470 :         case GDT_Byte:
    2321             :         {
    2322     8085470 :             GByte *pSrc =
    2323     8085470 :                 reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
    2324     8085470 :             pSrc += iSrcOffset;
    2325    79828800 :             for (int i = 0; i < nSrcLen; i += 2)
    2326             :             {
    2327    71743400 :                 adfReal[i] = pSrc[i];
    2328    71743400 :                 adfReal[i + 1] = pSrc[i + 1];
    2329             :             }
    2330     8085470 :             break;
    2331             :         }
    2332             : 
    2333           0 :         case GDT_Int8:
    2334             :         {
    2335           0 :             GInt8 *pSrc =
    2336           0 :                 reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
    2337           0 :             pSrc += iSrcOffset;
    2338           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2339             :             {
    2340           0 :                 adfReal[i] = pSrc[i];
    2341           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2342             :             }
    2343           0 :             break;
    2344             :         }
    2345             : 
    2346        5558 :         case GDT_Int16:
    2347             :         {
    2348        5558 :             GInt16 *pSrc =
    2349        5558 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2350        5558 :             pSrc += iSrcOffset;
    2351       21380 :             for (int i = 0; i < nSrcLen; i += 2)
    2352             :             {
    2353       15822 :                 adfReal[i] = pSrc[i];
    2354       15822 :                 adfReal[i + 1] = pSrc[i + 1];
    2355             :             }
    2356        5558 :             break;
    2357             :         }
    2358             : 
    2359        4114 :         case GDT_UInt16:
    2360             :         {
    2361        4114 :             GUInt16 *pSrc =
    2362        4114 :                 reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
    2363        4114 :             pSrc += iSrcOffset;
    2364       18492 :             for (int i = 0; i < nSrcLen; i += 2)
    2365             :             {
    2366       14378 :                 adfReal[i] = pSrc[i];
    2367       14378 :                 adfReal[i + 1] = pSrc[i + 1];
    2368             :             }
    2369        4114 :             break;
    2370             :         }
    2371             : 
    2372        1130 :         case GDT_Int32:
    2373             :         {
    2374        1130 :             GInt32 *pSrc =
    2375        1130 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2376        1130 :             pSrc += iSrcOffset;
    2377        2992 :             for (int i = 0; i < nSrcLen; i += 2)
    2378             :             {
    2379        1862 :                 adfReal[i] = pSrc[i];
    2380        1862 :                 adfReal[i + 1] = pSrc[i + 1];
    2381             :             }
    2382        1130 :             break;
    2383             :         }
    2384             : 
    2385         750 :         case GDT_UInt32:
    2386             :         {
    2387         750 :             GUInt32 *pSrc =
    2388         750 :                 reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
    2389         750 :             pSrc += iSrcOffset;
    2390        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2391             :             {
    2392        1482 :                 adfReal[i] = pSrc[i];
    2393        1482 :                 adfReal[i + 1] = pSrc[i + 1];
    2394             :             }
    2395         750 :             break;
    2396             :         }
    2397             : 
    2398         190 :         case GDT_Int64:
    2399             :         {
    2400         190 :             auto pSrc =
    2401         190 :                 reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
    2402         190 :             pSrc += iSrcOffset;
    2403         380 :             for (int i = 0; i < nSrcLen; i += 2)
    2404             :             {
    2405         190 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2406         190 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2407             :             }
    2408         190 :             break;
    2409             :         }
    2410             : 
    2411           0 :         case GDT_UInt64:
    2412             :         {
    2413           0 :             auto pSrc =
    2414           0 :                 reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
    2415           0 :             pSrc += iSrcOffset;
    2416           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2417             :             {
    2418           0 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2419           0 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2420             :             }
    2421           0 :             break;
    2422             :         }
    2423             : 
    2424       25074 :         case GDT_Float32:
    2425             :         {
    2426       25074 :             float *pSrc =
    2427       25074 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2428       25074 :             pSrc += iSrcOffset;
    2429      121347 :             for (int i = 0; i < nSrcLen; i += 2)
    2430             :             {
    2431       96273 :                 adfReal[i] = pSrc[i];
    2432       96273 :                 adfReal[i + 1] = pSrc[i + 1];
    2433             :             }
    2434       25074 :             break;
    2435             :         }
    2436             : 
    2437         940 :         case GDT_Float64:
    2438             :         {
    2439         940 :             double *pSrc =
    2440         940 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2441         940 :             pSrc += iSrcOffset;
    2442        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2443             :             {
    2444        1672 :                 adfReal[i] = pSrc[i];
    2445        1672 :                 adfReal[i + 1] = pSrc[i + 1];
    2446             :             }
    2447         940 :             break;
    2448             :         }
    2449             : 
    2450     1169410 :         case GDT_CInt16:
    2451             :         {
    2452     1169410 :             GInt16 *pSrc =
    2453     1169410 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2454     1169410 :             pSrc += 2 * iSrcOffset;
    2455     4676400 :             for (int i = 0; i < nSrcLen; i += 2)
    2456             :             {
    2457     3506990 :                 adfReal[i] = pSrc[2 * i];
    2458     3506990 :                 padfImag[i] = pSrc[2 * i + 1];
    2459             : 
    2460     3506990 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2461     3506990 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2462             :             }
    2463     1169410 :             break;
    2464             :         }
    2465             : 
    2466         940 :         case GDT_CInt32:
    2467             :         {
    2468         940 :             GInt32 *pSrc =
    2469         940 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2470         940 :             pSrc += 2 * iSrcOffset;
    2471        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2472             :             {
    2473        1672 :                 adfReal[i] = pSrc[2 * i];
    2474        1672 :                 padfImag[i] = pSrc[2 * i + 1];
    2475             : 
    2476        1672 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2477        1672 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2478             :             }
    2479         940 :             break;
    2480             :         }
    2481             : 
    2482         940 :         case GDT_CFloat32:
    2483             :         {
    2484         940 :             float *pSrc =
    2485         940 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2486         940 :             pSrc += 2 * iSrcOffset;
    2487        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2488             :             {
    2489        1672 :                 adfReal[i] = pSrc[2 * i];
    2490        1672 :                 padfImag[i] = pSrc[2 * i + 1];
    2491             : 
    2492        1672 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2493        1672 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2494             :             }
    2495         940 :             break;
    2496             :         }
    2497             : 
    2498         940 :         case GDT_CFloat64:
    2499             :         {
    2500         940 :             double *pSrc =
    2501         940 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2502         940 :             pSrc += 2 * iSrcOffset;
    2503        2612 :             for (int i = 0; i < nSrcLen; i += 2)
    2504             :             {
    2505        1672 :                 adfReal[i] = pSrc[2 * i];
    2506        1672 :                 padfImag[i] = pSrc[2 * i + 1];
    2507             : 
    2508        1672 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2509        1672 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2510             :             }
    2511         940 :             break;
    2512             :         }
    2513             : 
    2514           0 :         case GDT_Unknown:
    2515             :         case GDT_TypeCount:
    2516           0 :             CPLAssert(false);
    2517             :             if (padfDensity)
    2518             :                 memset(padfDensity, 0, nSrcLen * sizeof(double));
    2519             :             return false;
    2520             :     }
    2521             : 
    2522     9295450 :     if (padfDensity == nullptr)
    2523     8162000 :         return true;
    2524             : 
    2525     1133450 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2526             :     {
    2527     3234200 :         for (int i = 0; i < nSrcLen; i += 2)
    2528             :         {
    2529             :             // Take into account earlier calcs.
    2530     2112850 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2531             :             {
    2532     2072950 :                 padfDensity[i] = 1.0;
    2533     2072950 :                 bHasValid = true;
    2534             :             }
    2535             : 
    2536     2112850 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2537             :             {
    2538     2073600 :                 padfDensity[i + 1] = 1.0;
    2539     2073600 :                 bHasValid = true;
    2540             :             }
    2541             :         }
    2542             :     }
    2543             :     else
    2544             :     {
    2545       54348 :         for (int i = 0; i < nSrcLen; i += 2)
    2546             :         {
    2547       42243 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2548       42243 :                 padfDensity[i] = poWK->pafUnifiedSrcDensity[iSrcOffset + i];
    2549       42243 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
    2550       41704 :                 bHasValid = true;
    2551             : 
    2552       42243 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2553       42243 :                 padfDensity[i + 1] =
    2554       42243 :                     poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1];
    2555       42243 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
    2556       41594 :                 bHasValid = true;
    2557             :         }
    2558             :     }
    2559             : 
    2560     1133450 :     return bHasValid;
    2561             : }
    2562             : 
    2563             : /************************************************************************/
    2564             : /*                          GWKGetPixelT()                              */
    2565             : /************************************************************************/
    2566             : 
    2567             : template <class T>
    2568     7113618 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
    2569             :                          GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
    2570             : 
    2571             : {
    2572     7113618 :     T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2573             : 
    2574    16364702 :     if ((poWK->panUnifiedSrcValid != nullptr &&
    2575    14227256 :          !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
    2576     7113618 :         (poWK->papanBandSrcValid != nullptr &&
    2577          21 :          poWK->papanBandSrcValid[iBand] != nullptr &&
    2578          21 :          !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
    2579             :     {
    2580           9 :         *pdfDensity = 0.0;
    2581           9 :         return false;
    2582             :     }
    2583             : 
    2584     7113608 :     *pValue = pSrc[iSrcOffset];
    2585             : 
    2586     7113608 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2587     6778855 :         *pdfDensity = 1.0;
    2588             :     else
    2589      334754 :         *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    2590             : 
    2591     7113608 :     return *pdfDensity != 0.0;
    2592             : }
    2593             : 
    2594             : /************************************************************************/
    2595             : /*                        GWKBilinearResample()                         */
    2596             : /*     Set of bilinear interpolators                                    */
    2597             : /************************************************************************/
    2598             : 
    2599       72664 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2600             :                                        double dfSrcX, double dfSrcY,
    2601             :                                        double *pdfDensity, double *pdfReal,
    2602             :                                        double *pdfImag)
    2603             : 
    2604             : {
    2605             :     // Save as local variables to avoid following pointers.
    2606       72664 :     const int nSrcXSize = poWK->nSrcXSize;
    2607       72664 :     const int nSrcYSize = poWK->nSrcYSize;
    2608             : 
    2609       72664 :     int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2610       72664 :     int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2611       72664 :     double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2612       72664 :     double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2613       72664 :     bool bShifted = false;
    2614             : 
    2615       72664 :     if (iSrcX == -1)
    2616             :     {
    2617         292 :         iSrcX = 0;
    2618         292 :         dfRatioX = 1;
    2619             :     }
    2620       72664 :     if (iSrcY == -1)
    2621             :     {
    2622        7686 :         iSrcY = 0;
    2623        7686 :         dfRatioY = 1;
    2624             :     }
    2625       72664 :     GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    2626             : 
    2627             :     // Shift so we don't overrun the array.
    2628       72664 :     if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
    2629       72614 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
    2630       72614 :             iSrcOffset + nSrcXSize + 1)
    2631             :     {
    2632         100 :         bShifted = true;
    2633         100 :         --iSrcOffset;
    2634             :     }
    2635             : 
    2636       72664 :     double adfDensity[2] = {0.0, 0.0};
    2637       72664 :     double adfReal[2] = {0.0, 0.0};
    2638       72664 :     double adfImag[2] = {0.0, 0.0};
    2639       72664 :     double dfAccumulatorReal = 0.0;
    2640       72664 :     double dfAccumulatorImag = 0.0;
    2641       72664 :     double dfAccumulatorDensity = 0.0;
    2642       72664 :     double dfAccumulatorDivisor = 0.0;
    2643             : 
    2644       72664 :     const GPtrDiff_t nSrcPixels =
    2645       72664 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
    2646             :     // Get pixel row.
    2647       72664 :     if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
    2648      145328 :         iSrcOffset < nSrcPixels &&
    2649       72664 :         GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
    2650             :                        adfImag))
    2651             :     {
    2652       67008 :         double dfMult1 = dfRatioX * dfRatioY;
    2653       67008 :         double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
    2654             : 
    2655             :         // Shifting corrected.
    2656       67008 :         if (bShifted)
    2657             :         {
    2658         100 :             adfReal[0] = adfReal[1];
    2659         100 :             adfImag[0] = adfImag[1];
    2660         100 :             adfDensity[0] = adfDensity[1];
    2661             :         }
    2662             : 
    2663             :         // Upper Left Pixel.
    2664       67008 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2665       67008 :             adfDensity[0] > SRC_DENSITY_THRESHOLD)
    2666             :         {
    2667       61578 :             dfAccumulatorDivisor += dfMult1;
    2668             : 
    2669       61578 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    2670       61578 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    2671       61578 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    2672             :         }
    2673             : 
    2674             :         // Upper Right Pixel.
    2675       67008 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    2676       66427 :             adfDensity[1] > SRC_DENSITY_THRESHOLD)
    2677             :         {
    2678       61153 :             dfAccumulatorDivisor += dfMult2;
    2679             : 
    2680       61153 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    2681       61153 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    2682       61153 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    2683             :         }
    2684             :     }
    2685             : 
    2686             :     // Get pixel row.
    2687       72664 :     if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
    2688      213910 :         iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
    2689       68582 :         GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
    2690             :                        adfReal, adfImag))
    2691             :     {
    2692       63023 :         double dfMult1 = dfRatioX * (1.0 - dfRatioY);
    2693       63023 :         double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    2694             : 
    2695             :         // Shifting corrected
    2696       63023 :         if (bShifted)
    2697             :         {
    2698          50 :             adfReal[0] = adfReal[1];
    2699          50 :             adfImag[0] = adfImag[1];
    2700          50 :             adfDensity[0] = adfDensity[1];
    2701             :         }
    2702             : 
    2703             :         // Lower Left Pixel
    2704       63023 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    2705       63023 :             adfDensity[0] > SRC_DENSITY_THRESHOLD)
    2706             :         {
    2707       57744 :             dfAccumulatorDivisor += dfMult1;
    2708             : 
    2709       57744 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    2710       57744 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    2711       57744 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    2712             :         }
    2713             : 
    2714             :         // Lower Right Pixel.
    2715       63023 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    2716       62492 :             adfDensity[1] > SRC_DENSITY_THRESHOLD)
    2717             :         {
    2718       57515 :             dfAccumulatorDivisor += dfMult2;
    2719             : 
    2720       57515 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    2721       57515 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    2722       57515 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    2723             :         }
    2724             :     }
    2725             : 
    2726             :     /* -------------------------------------------------------------------- */
    2727             :     /*      Return result.                                                  */
    2728             :     /* -------------------------------------------------------------------- */
    2729       72664 :     if (dfAccumulatorDivisor == 1.0)
    2730             :     {
    2731       41607 :         *pdfReal = dfAccumulatorReal;
    2732       41607 :         *pdfImag = dfAccumulatorImag;
    2733       41607 :         *pdfDensity = dfAccumulatorDensity;
    2734       41607 :         return false;
    2735             :     }
    2736       31057 :     else if (dfAccumulatorDivisor < 0.00001)
    2737             :     {
    2738           0 :         *pdfReal = 0.0;
    2739           0 :         *pdfImag = 0.0;
    2740           0 :         *pdfDensity = 0.0;
    2741           0 :         return false;
    2742             :     }
    2743             :     else
    2744             :     {
    2745       31057 :         *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
    2746       31057 :         *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
    2747       31057 :         *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
    2748       31057 :         return true;
    2749             :     }
    2750             : }
    2751             : 
    2752             : template <class T>
    2753     5027555 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    2754             :                                                int iBand, double dfSrcX,
    2755             :                                                double dfSrcY, T *pValue)
    2756             : 
    2757             : {
    2758             : 
    2759     5027555 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2760     5027555 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2761     5027555 :     GPtrDiff_t iSrcOffset =
    2762     5027555 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    2763     5027555 :     const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2764     5027555 :     const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2765             : 
    2766     5027555 :     const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2767             : 
    2768     5027555 :     if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    2769     4925840 :         iSrcY + 1 < poWK->nSrcYSize)
    2770             :     {
    2771     4903658 :         const double dfAccumulator =
    2772     4903658 :             (pSrc[iSrcOffset] * dfRatioX +
    2773     4903658 :              pSrc[iSrcOffset + 1] * (1.0 - dfRatioX)) *
    2774             :                 dfRatioY +
    2775     4903658 :             (pSrc[iSrcOffset + poWK->nSrcXSize] * dfRatioX +
    2776     4903658 :              pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * (1.0 - dfRatioX)) *
    2777     4903658 :                 (1.0 - dfRatioY);
    2778             : 
    2779     4903658 :         *pValue = GWKRoundValueT<T>(dfAccumulator);
    2780             : 
    2781     4903658 :         return true;
    2782             :     }
    2783             : 
    2784      123897 :     double dfAccumulatorDivisor = 0.0;
    2785      123897 :     double dfAccumulator = 0.0;
    2786             : 
    2787             :     // Upper Left Pixel.
    2788      123897 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
    2789       50416 :         iSrcY < poWK->nSrcYSize)
    2790             :     {
    2791       50416 :         const double dfMult = dfRatioX * dfRatioY;
    2792             : 
    2793       50416 :         dfAccumulatorDivisor += dfMult;
    2794             : 
    2795       50416 :         dfAccumulator += pSrc[iSrcOffset] * dfMult;
    2796             :     }
    2797             : 
    2798             :     // Upper Right Pixel.
    2799      123897 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    2800       58956 :         iSrcY < poWK->nSrcYSize)
    2801             :     {
    2802       58956 :         const double dfMult = (1.0 - dfRatioX) * dfRatioY;
    2803             : 
    2804       58956 :         dfAccumulatorDivisor += dfMult;
    2805             : 
    2806       58956 :         dfAccumulator += pSrc[iSrcOffset + 1] * dfMult;
    2807             :     }
    2808             : 
    2809             :     // Lower Right Pixel.
    2810      123897 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    2811       95046 :         iSrcY + 1 < poWK->nSrcYSize)
    2812             :     {
    2813       72477 :         const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    2814             : 
    2815       72477 :         dfAccumulatorDivisor += dfMult;
    2816             : 
    2817       72477 :         dfAccumulator += pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * dfMult;
    2818             :     }
    2819             : 
    2820             :     // Lower Left Pixel.
    2821      123897 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    2822       86483 :         iSrcY + 1 < poWK->nSrcYSize)
    2823             :     {
    2824       63709 :         const double dfMult = dfRatioX * (1.0 - dfRatioY);
    2825             : 
    2826       63709 :         dfAccumulatorDivisor += dfMult;
    2827             : 
    2828       63709 :         dfAccumulator += pSrc[iSrcOffset + poWK->nSrcXSize] * dfMult;
    2829             :     }
    2830             : 
    2831             :     /* -------------------------------------------------------------------- */
    2832             :     /*      Return result.                                                  */
    2833             :     /* -------------------------------------------------------------------- */
    2834      123897 :     double dfValue = 0.0;
    2835             : 
    2836      123897 :     if (dfAccumulatorDivisor < 0.00001)
    2837             :     {
    2838           0 :         *pValue = 0;
    2839           0 :         return false;
    2840             :     }
    2841      123897 :     else if (dfAccumulatorDivisor == 1.0)
    2842             :     {
    2843        7571 :         dfValue = dfAccumulator;
    2844             :     }
    2845             :     else
    2846             :     {
    2847      116326 :         dfValue = dfAccumulator / dfAccumulatorDivisor;
    2848             :     }
    2849             : 
    2850      123897 :     *pValue = GWKRoundValueT<T>(dfValue);
    2851             : 
    2852      123897 :     return true;
    2853             : }
    2854             : 
    2855             : /************************************************************************/
    2856             : /*                        GWKCubicResample()                            */
    2857             : /*     Set of bicubic interpolators using cubic convolution.            */
    2858             : /************************************************************************/
    2859             : 
    2860             : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
    2861             : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
    2862             : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
    2863             : 
    2864             : // TODO(schwehr): Use an inline function.
    2865             : #define CubicConvolution(distance1, distance2, distance3, f0, f1, f2, f3)      \
    2866             :     (f1 + 0.5 * (distance1 * (f2 - f0) +                                       \
    2867             :                  distance2 * (2.0 * f0 - 5.0 * f1 + 4.0 * f2 - f3) +           \
    2868             :                  distance3 * (3.0 * (f1 - f2) + f3 - f0)))
    2869             : 
    2870             : /************************************************************************/
    2871             : /*                       GWKCubicComputeWeights()                       */
    2872             : /************************************************************************/
    2873             : 
    2874             : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
    2875             : 
    2876             : // TODO(schwehr): Use an inline function.
    2877             : #define GWKCubicComputeWeights(dfX_, adfCoeffs)                                \
    2878             :     {                                                                          \
    2879             :         const double dfX = dfX_;                                               \
    2880             :         const double dfHalfX = 0.5 * dfX;                                      \
    2881             :         const double dfThreeX = 3.0 * dfX;                                     \
    2882             :         const double dfHalfX2 = dfHalfX * dfX;                                 \
    2883             :                                                                                \
    2884             :         adfCoeffs[0] = dfHalfX * (-1 + dfX * (2 - dfX));                       \
    2885             :         adfCoeffs[1] = 1 + dfHalfX2 * (-5 + dfThreeX);                         \
    2886             :         adfCoeffs[2] = dfHalfX * (1 + dfX * (4 - dfThreeX));                   \
    2887             :         adfCoeffs[3] = dfHalfX2 * (-1 + dfX);                                  \
    2888             :     }
    2889             : 
    2890             : // TODO(schwehr): Use an inline function.
    2891             : #define CONVOL4(v1, v2)                                                        \
    2892             :     ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2] +               \
    2893             :      (v1)[3] * (v2)[3])
    2894             : 
    2895             : #if 0
    2896             : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
    2897             : // instead of 17.
    2898             : // TODO(schwehr): Use an inline function.
    2899             : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX)             \
    2900             :     {                                                                          \
    2901             :         const double dfX = dfX_;                                               \
    2902             :         dfHalfX = 0.5 * dfX;                                                   \
    2903             :         const double dfThreeX = 3.0 * dfX;                                     \
    2904             :         const double dfXMinus1 = dfX - 1;                                      \
    2905             :                                                                                \
    2906             :         adfCoeffs[0] = -1 + dfX * (2 - dfX);                                   \
    2907             :         adfCoeffs[1] = dfX * (-5 + dfThreeX);                                  \
    2908             :         /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/                           \
    2909             :         adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1];                              \
    2910             :         /*adfCoeffs[3] = dfX * (-1 + dfX); */                                  \
    2911             :         adfCoeffs[3] = dfXMinus1 - adfCoeffs[0];                               \
    2912             :     }
    2913             : 
    2914             : // TODO(schwehr): Use an inline function.
    2915             : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX)                               \
    2916             :     ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
    2917             :                            (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
    2918             : #endif
    2919             : 
    2920      299879 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2921             :                                     double dfSrcX, double dfSrcY,
    2922             :                                     double *pdfDensity, double *pdfReal,
    2923             :                                     double *pdfImag)
    2924             : 
    2925             : {
    2926      299879 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    2927      299879 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    2928      299879 :     GPtrDiff_t iSrcOffset =
    2929      299879 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    2930      299879 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    2931      299879 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    2932      299879 :     double adfDensity[4] = {};
    2933      299879 :     double adfReal[4] = {};
    2934      299879 :     double adfImag[4] = {};
    2935             : 
    2936             :     // Get the bilinear interpolation at the image borders.
    2937      299879 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    2938      284412 :         iSrcY + 2 >= poWK->nSrcYSize)
    2939       24136 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    2940       24136 :                                           pdfDensity, pdfReal, pdfImag);
    2941             : 
    2942      275743 :     double adfValueDens[4] = {};
    2943      275743 :     double adfValueReal[4] = {};
    2944      275743 :     double adfValueImag[4] = {};
    2945             : 
    2946      275743 :     double adfCoeffsX[4] = {};
    2947      275743 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    2948             : 
    2949     1232410 :     for (GPtrDiff_t i = -1; i < 3; i++)
    2950             :     {
    2951     1003120 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    2952      991507 :                             2, adfDensity, adfReal, adfImag) ||
    2953      991507 :             adfDensity[0] < SRC_DENSITY_THRESHOLD ||
    2954      973867 :             adfDensity[1] < SRC_DENSITY_THRESHOLD ||
    2955     2960190 :             adfDensity[2] < SRC_DENSITY_THRESHOLD ||
    2956      965566 :             adfDensity[3] < SRC_DENSITY_THRESHOLD)
    2957             :         {
    2958       46449 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    2959       46449 :                                               pdfDensity, pdfReal, pdfImag);
    2960             :         }
    2961             : 
    2962      956668 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    2963      956668 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    2964      956668 :         adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
    2965             :     }
    2966             : 
    2967             :     /* -------------------------------------------------------------------- */
    2968             :     /*      For now, if we have any pixels missing in the kernel area,      */
    2969             :     /*      we fallback on using bilinear interpolation.  Ideally we        */
    2970             :     /*      should do "weight adjustment" of our results similarly to       */
    2971             :     /*      what is done for the cubic spline and lanc. interpolators.      */
    2972             :     /* -------------------------------------------------------------------- */
    2973             : 
    2974      229294 :     double adfCoeffsY[4] = {};
    2975      229294 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    2976             : 
    2977      229294 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    2978      229294 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    2979      229294 :     *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
    2980             : 
    2981      229294 :     return true;
    2982             : }
    2983             : 
    2984             : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
    2985             : // perf benefit.
    2986             : 
    2987             : #if defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))
    2988             : 
    2989             : /************************************************************************/
    2990             : /*                           XMMLoad4Values()                           */
    2991             : /*                                                                      */
    2992             : /*  Load 4 packed byte or uint16, cast them to float and put them in a  */
    2993             : /*  m128 register.                                                      */
    2994             : /************************************************************************/
    2995             : 
    2996             : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
    2997             : {
    2998             :     unsigned int i;
    2999             :     memcpy(&i, ptr, 4);
    3000             :     __m128i xmm_i = _mm_cvtsi32_si128(s);
    3001             :     // Zero extend 4 packed unsigned 8-bit integers in a to packed
    3002             :     // 32-bit integers.
    3003             : #if __SSE4_1__
    3004             :     xmm_i = _mm_cvtepu8_epi32(xmm_i);
    3005             : #else
    3006             :     xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
    3007             :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3008             : #endif
    3009             :     return _mm_cvtepi32_ps(xmm_i);
    3010             : }
    3011             : 
    3012             : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
    3013             : {
    3014             :     GUInt64 i;
    3015             :     memcpy(&i, ptr, 8);
    3016             :     __m128i xmm_i = _mm_cvtsi64_si128(s);
    3017             :     // Zero extend 4 packed unsigned 16-bit integers in a to packed
    3018             :     // 32-bit integers.
    3019             : #if __SSE4_1__
    3020             :     xmm_i = _mm_cvtepu16_epi32(xmm_i);
    3021             : #else
    3022             :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3023             : #endif
    3024             :     return _mm_cvtepi32_ps(xmm_i);
    3025             : }
    3026             : 
    3027             : /************************************************************************/
    3028             : /*                           XMMHorizontalAdd()                         */
    3029             : /*                                                                      */
    3030             : /*  Return the sum of the 4 floating points of the register.            */
    3031             : /************************************************************************/
    3032             : 
    3033             : #if __SSE3__
    3034             : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3035             : {
    3036             :     __m128 shuf = _mm_movehdup_ps(v);   // (v3   , v3   , v1   , v1)
    3037             :     __m128 sums = _mm_add_ps(v, shuf);  // (v3+v3, v3+v2, v1+v1, v1+v0)
    3038             :     shuf = _mm_movehl_ps(shuf, sums);   // (v3   , v3   , v3+v3, v3+v2)
    3039             :     sums = _mm_add_ss(sums, shuf);      // (v1+v0)+(v3+v2)
    3040             :     return _mm_cvtss_f32(sums);
    3041             : }
    3042             : #else
    3043             : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3044             : {
    3045             :     __m128 shuf = _mm_movehl_ps(v, v);     // (v3   , v2   , v3   , v2)
    3046             :     __m128 sums = _mm_add_ps(v, shuf);     // (v3+v3, v2+v2, v3+v1, v2+v0)
    3047             :     shuf = _mm_shuffle_ps(sums, sums, 1);  // (v2+v0, v2+v0, v2+v0, v3+v1)
    3048             :     sums = _mm_add_ss(sums, shuf);         // (v2+v0)+(v3+v1)
    3049             :     return _mm_cvtss_f32(sums);
    3050             : }
    3051             : #endif
    3052             : 
    3053             : #endif  // defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))
    3054             : 
    3055             : /************************************************************************/
    3056             : /*            GWKCubicResampleSrcMaskIsDensity4SampleRealT()            */
    3057             : /************************************************************************/
    3058             : 
    3059             : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
    3060             : // because there are a few assumptions above those types.
    3061             : 
    3062             : template <class T>
    3063         361 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
    3064             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3065             :     double *pdfDensity, double *pdfReal)
    3066             : {
    3067         361 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3068         361 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3069         361 :     const GPtrDiff_t iSrcOffset =
    3070         361 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3071             : 
    3072             :     // Get the bilinear interpolation at the image borders.
    3073         361 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3074         361 :         iSrcY + 2 >= poWK->nSrcYSize)
    3075             :     {
    3076           0 :         double adfImagIgnored[4] = {};
    3077           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3078           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3079             :     }
    3080             : 
    3081             : #if defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))
    3082             :     const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
    3083             :     const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
    3084             : 
    3085             :     // TODO(schwehr): Explain the magic numbers.
    3086             :     float afTemp[4 + 4 + 4 + 1];
    3087             :     float *pafAligned =
    3088             :         reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
    3089             :     float *pafCoeffs = pafAligned;
    3090             :     float *pafDensity = pafAligned + 4;
    3091             :     float *pafValue = pafAligned + 8;
    3092             : 
    3093             :     const float fHalfDeltaX = 0.5f * fDeltaX;
    3094             :     const float fThreeDeltaX = 3.0f * fDeltaX;
    3095             :     const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
    3096             : 
    3097             :     pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
    3098             :     pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
    3099             :     pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
    3100             :     pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
    3101             :     __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
    3102             :     const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD);
    3103             : 
    3104             :     __m128 xmmMaskLowDensity = _mm_setzero_ps();
    3105             :     for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
    3106             :          i++, iOffset += poWK->nSrcXSize)
    3107             :     {
    3108             :         const __m128 xmmDensity =
    3109             :             _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
    3110             :         xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
    3111             :                                       _mm_cmplt_ps(xmmDensity, xmmThreshold));
    3112             :         pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3113             : 
    3114             :         const __m128 xmmValues =
    3115             :             XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
    3116             :         pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
    3117             :     }
    3118             :     if (_mm_movemask_ps(xmmMaskLowDensity))
    3119             :     {
    3120             :         double adfImagIgnored[4] = {};
    3121             :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3122             :                                           pdfDensity, pdfReal, adfImagIgnored);
    3123             :     }
    3124             : 
    3125             :     const float fHalfDeltaY = 0.5f * fDeltaY;
    3126             :     const float fThreeDeltaY = 3.0f * fDeltaY;
    3127             :     const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
    3128             : 
    3129             :     pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
    3130             :     pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
    3131             :     pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
    3132             :     pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
    3133             : 
    3134             :     xmmCoeffs = _mm_load_ps(pafCoeffs);
    3135             : 
    3136             :     const __m128 xmmDensity = _mm_load_ps(pafDensity);
    3137             :     const __m128 xmmValue = _mm_load_ps(pafValue);
    3138             :     *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3139             :     *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
    3140             : 
    3141             :     // We did all above computations on float32 whereas the general case is
    3142             :     // float64. Not sure if one is fundamentally more correct than the other
    3143             :     // one, but we want our optimization to give the same result as the
    3144             :     // general case as much as possible, so if the resulting value is
    3145             :     // close to some_int_value + 0.5, redo the computation with the general
    3146             :     // case.
    3147             :     // Note: If other types than Byte or UInt16, will need changes.
    3148             :     if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
    3149             :         return true;
    3150             : 
    3151             : #endif  // defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))
    3152             : 
    3153         361 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3154         361 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3155             : 
    3156         361 :     double adfValueDens[4] = {};
    3157         361 :     double adfValueReal[4] = {};
    3158             : 
    3159         361 :     double adfCoeffsX[4] = {};
    3160         361 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3161             : 
    3162         361 :     double adfCoeffsY[4] = {};
    3163         361 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3164             : 
    3165        1433 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3166             :     {
    3167        1177 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3168             : #if !(defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)))
    3169        1177 :         if (poWK->pafUnifiedSrcDensity[iOffset + 0] < SRC_DENSITY_THRESHOLD ||
    3170        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 1] < SRC_DENSITY_THRESHOLD ||
    3171        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 2] < SRC_DENSITY_THRESHOLD ||
    3172        1089 :             poWK->pafUnifiedSrcDensity[iOffset + 3] < SRC_DENSITY_THRESHOLD)
    3173             :         {
    3174         105 :             double adfImagIgnored[4] = {};
    3175         105 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3176             :                                               pdfDensity, pdfReal,
    3177         105 :                                               adfImagIgnored);
    3178             :         }
    3179             : #endif
    3180             : 
    3181        1072 :         adfValueDens[i + 1] =
    3182        1072 :             CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
    3183             : 
    3184        1072 :         adfValueReal[i + 1] = CONVOL4(
    3185             :             adfCoeffsX,
    3186             :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3187             :     }
    3188             : 
    3189         256 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3190         256 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3191             : 
    3192         256 :     return true;
    3193             : }
    3194             : 
    3195             : /************************************************************************/
    3196             : /*              GWKCubicResampleSrcMaskIsDensity4SampleReal()             */
    3197             : /*     Bi-cubic when source has and only has pafUnifiedSrcDensity.      */
    3198             : /************************************************************************/
    3199             : 
    3200           0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
    3201             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3202             :     double *pdfDensity, double *pdfReal)
    3203             : 
    3204             : {
    3205           0 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3206           0 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3207           0 :     const GPtrDiff_t iSrcOffset =
    3208           0 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3209           0 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3210           0 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3211             : 
    3212             :     // Get the bilinear interpolation at the image borders.
    3213           0 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3214           0 :         iSrcY + 2 >= poWK->nSrcYSize)
    3215             :     {
    3216           0 :         double adfImagIgnored[4] = {};
    3217           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3218           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3219             :     }
    3220             : 
    3221           0 :     double adfCoeffsX[4] = {};
    3222           0 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3223             : 
    3224           0 :     double adfCoeffsY[4] = {};
    3225           0 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3226             : 
    3227           0 :     double adfValueDens[4] = {};
    3228           0 :     double adfValueReal[4] = {};
    3229           0 :     double adfDensity[4] = {};
    3230           0 :     double adfReal[4] = {};
    3231           0 :     double adfImagIgnored[4] = {};
    3232             : 
    3233           0 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3234             :     {
    3235           0 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3236           0 :                             2, adfDensity, adfReal, adfImagIgnored) ||
    3237           0 :             adfDensity[0] < SRC_DENSITY_THRESHOLD ||
    3238           0 :             adfDensity[1] < SRC_DENSITY_THRESHOLD ||
    3239           0 :             adfDensity[2] < SRC_DENSITY_THRESHOLD ||
    3240           0 :             adfDensity[3] < SRC_DENSITY_THRESHOLD)
    3241             :         {
    3242           0 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3243             :                                               pdfDensity, pdfReal,
    3244           0 :                                               adfImagIgnored);
    3245             :         }
    3246             : 
    3247           0 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3248           0 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3249             :     }
    3250             : 
    3251           0 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3252           0 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3253             : 
    3254           0 :     return true;
    3255             : }
    3256             : 
    3257             : template <class T>
    3258     1826603 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3259             :                                             int iBand, double dfSrcX,
    3260             :                                             double dfSrcY, T *pValue)
    3261             : 
    3262             : {
    3263     1826603 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3264     1826603 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3265     1826603 :     const GPtrDiff_t iSrcOffset =
    3266     1826603 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3267     1826603 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3268     1826603 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3269     1826603 :     const double dfDeltaY2 = dfDeltaY * dfDeltaY;
    3270     1826603 :     const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
    3271             : 
    3272             :     // Get the bilinear interpolation at the image borders.
    3273     1826603 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3274     1584121 :         iSrcY + 2 >= poWK->nSrcYSize)
    3275      301368 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    3276      301368 :                                                   pValue);
    3277             : 
    3278     1525235 :     double adfCoeffs[4] = {};
    3279     1525235 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
    3280             : 
    3281     1525235 :     double adfValue[4] = {};
    3282             : 
    3283     7626160 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3284             :     {
    3285     6100946 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3286             : 
    3287     6100946 :         adfValue[i + 1] = CONVOL4(
    3288             :             adfCoeffs,
    3289             :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3290             :     }
    3291             : 
    3292     1525235 :     const double dfValue =
    3293     1525235 :         CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
    3294             :                          adfValue[1], adfValue[2], adfValue[3]);
    3295             : 
    3296     1525235 :     *pValue = GWKClampValueT<T>(dfValue);
    3297             : 
    3298     1525235 :     return true;
    3299             : }
    3300             : 
    3301             : /************************************************************************/
    3302             : /*                          GWKLanczosSinc()                            */
    3303             : /************************************************************************/
    3304             : 
    3305             : /*
    3306             :  * Lanczos windowed sinc interpolation kernel with radius r.
    3307             :  *        /
    3308             :  *        | sinc(x) * sinc(x/r), if |x| < r
    3309             :  * L(x) = | 1, if x = 0                     ,
    3310             :  *        | 0, otherwise
    3311             :  *        \
    3312             :  *
    3313             :  * where sinc(x) = sin(PI * x) / (PI * x).
    3314             :  */
    3315             : 
    3316         719 : static double GWKLanczosSinc(double dfX)
    3317             : {
    3318         719 :     if (dfX == 0.0)
    3319          13 :         return 1.0;
    3320             : 
    3321         706 :     const double dfPIX = M_PI * dfX;
    3322         706 :     const double dfPIXoverR = dfPIX / 3;
    3323         706 :     const double dfPIX2overR = dfPIX * dfPIXoverR;
    3324         706 :     return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
    3325             : }
    3326             : 
    3327       64358 : static double GWKLanczosSinc4Values(double *padfValues)
    3328             : {
    3329      321790 :     for (int i = 0; i < 4; i++)
    3330             :     {
    3331      257432 :         if (padfValues[i] == 0.0)
    3332             :         {
    3333           0 :             padfValues[i] = 1.0;
    3334             :         }
    3335             :         else
    3336             :         {
    3337      257432 :             const double dfPIX = M_PI * padfValues[i];
    3338      257432 :             const double dfPIXoverR = dfPIX / 3;
    3339      257432 :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3340      257432 :             padfValues[i] = sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
    3341             :         }
    3342             :     }
    3343       64358 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3344             : }
    3345             : 
    3346             : /************************************************************************/
    3347             : /*                           GWKBilinear()                              */
    3348             : /************************************************************************/
    3349             : 
    3350     7272990 : static double GWKBilinear(double dfX)
    3351             : {
    3352     7272990 :     double dfAbsX = fabs(dfX);
    3353     7272990 :     if (dfAbsX <= 1.0)
    3354     6799870 :         return 1 - dfAbsX;
    3355             :     else
    3356      473117 :         return 0.0;
    3357             : }
    3358             : 
    3359      952924 : static double GWKBilinear4Values(double *padfValues)
    3360             : {
    3361      952924 :     double dfAbsX0 = fabs(padfValues[0]);
    3362      952924 :     double dfAbsX1 = fabs(padfValues[1]);
    3363      952924 :     double dfAbsX2 = fabs(padfValues[2]);
    3364      952924 :     double dfAbsX3 = fabs(padfValues[3]);
    3365      952924 :     if (dfAbsX0 <= 1.0)
    3366      660699 :         padfValues[0] = 1 - dfAbsX0;
    3367             :     else
    3368      292225 :         padfValues[0] = 0.0;
    3369      952924 :     if (dfAbsX1 <= 1.0)
    3370      952924 :         padfValues[1] = 1 - dfAbsX1;
    3371             :     else
    3372           0 :         padfValues[1] = 0.0;
    3373      952924 :     if (dfAbsX2 <= 1.0)
    3374      952924 :         padfValues[2] = 1 - dfAbsX2;
    3375             :     else
    3376           0 :         padfValues[2] = 0.0;
    3377      952924 :     if (dfAbsX3 <= 1.0)
    3378      660448 :         padfValues[3] = 1 - dfAbsX3;
    3379             :     else
    3380      292476 :         padfValues[3] = 0.0;
    3381      952924 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3382             : }
    3383             : 
    3384             : /************************************************************************/
    3385             : /*                            GWKCubic()                                */
    3386             : /************************************************************************/
    3387             : 
    3388    12341400 : static double GWKCubic(double dfX)
    3389             : {
    3390             :     // http://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm
    3391             :     // W(x) formula with a = -0.5 (cubic hermite spline )
    3392             :     // or
    3393             :     // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
    3394             :     // k(x) (formula 8) with (B,C)=(0,0.5) the Catmull-Rom spline
    3395    12341400 :     double dfAbsX = fabs(dfX);
    3396    12341400 :     if (dfAbsX <= 1.0)
    3397             :     {
    3398     1031130 :         double dfX2 = dfX * dfX;
    3399     1031130 :         return dfX2 * (1.5 * dfAbsX - 2.5) + 1;
    3400             :     }
    3401    11310300 :     else if (dfAbsX <= 2.0)
    3402             :     {
    3403     7131070 :         double dfX2 = dfX * dfX;
    3404     7131070 :         return dfX2 * (-0.5 * dfAbsX + 2.5) - 4 * dfAbsX + 2;
    3405             :     }
    3406             :     else
    3407     4179190 :         return 0.0;
    3408             : }
    3409             : 
    3410    18321600 : static double GWKCubic4Values(double *padfValues)
    3411             : {
    3412    18321600 :     const double dfAbsX_0 = fabs(padfValues[0]);
    3413    18321600 :     const double dfAbsX_1 = fabs(padfValues[1]);
    3414    18321600 :     const double dfAbsX_2 = fabs(padfValues[2]);
    3415    18321600 :     const double dfAbsX_3 = fabs(padfValues[3]);
    3416    18321600 :     const double dfX2_0 = padfValues[0] * padfValues[0];
    3417    18321600 :     const double dfX2_1 = padfValues[1] * padfValues[1];
    3418    18321600 :     const double dfX2_2 = padfValues[2] * padfValues[2];
    3419    18321600 :     const double dfX2_3 = padfValues[3] * padfValues[3];
    3420             : 
    3421    18321600 :     double dfVal0 = 0.0;
    3422    18321600 :     if (dfAbsX_0 <= 1.0)
    3423     2493660 :         dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
    3424    15827900 :     else if (dfAbsX_0 <= 2.0)
    3425    10952900 :         dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
    3426             : 
    3427    18321600 :     double dfVal1 = 0.0;
    3428    18321600 :     if (dfAbsX_1 <= 1.0)
    3429    10410700 :         dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
    3430     7910830 :     else if (dfAbsX_1 <= 2.0)
    3431     8047870 :         dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
    3432             : 
    3433    18321600 :     double dfVal2 = 0.0;
    3434    18321600 :     if (dfAbsX_2 <= 1.0)
    3435    15579900 :         dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
    3436     2741640 :     else if (dfAbsX_2 <= 2.0)
    3437     2867180 :         dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
    3438             : 
    3439    18321600 :     double dfVal3 = 0.0;
    3440    18321600 :     if (dfAbsX_3 <= 1.0)
    3441     8602200 :         dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
    3442     9719370 :     else if (dfAbsX_3 <= 2.0)
    3443     9069800 :         dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
    3444             : 
    3445    18321600 :     padfValues[0] = dfVal0;
    3446    18321600 :     padfValues[1] = dfVal1;
    3447    18321600 :     padfValues[2] = dfVal2;
    3448    18321600 :     padfValues[3] = dfVal3;
    3449    18321600 :     return dfVal0 + dfVal1 + dfVal2 + dfVal3;
    3450             : }
    3451             : 
    3452             : /************************************************************************/
    3453             : /*                           GWKBSpline()                               */
    3454             : /************************************************************************/
    3455             : 
    3456             : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
    3457             : // Equation 8 with (B,C)=(1,0)
    3458             : // 1/6 * ( 3 * |x|^3 -  6 * |x|^2 + 4) |x| < 1
    3459             : // 1/6 * ( -|x|^3 + 6 |x|^2  - 12|x| + 8) |x| >= 1 and |x| < 2
    3460             : 
    3461      146888 : static double GWKBSpline(double x)
    3462             : {
    3463      146888 :     const double xp2 = x + 2.0;
    3464      146888 :     const double xp1 = x + 1.0;
    3465      146888 :     const double xm1 = x - 1.0;
    3466             : 
    3467             :     // This will most likely be used, so we'll compute it ahead of time to
    3468             :     // avoid stalling the processor.
    3469      146888 :     const double xp2c = xp2 * xp2 * xp2;
    3470             : 
    3471             :     // Note that the test is computed only if it is needed.
    3472             :     // TODO(schwehr): Make this easier to follow.
    3473             :     return xp2 > 0.0
    3474      293776 :                ? ((xp1 > 0.0)
    3475      146888 :                       ? ((x > 0.0)
    3476      132530 :                              ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3477       98104 :                                    6.0 * x * x * x
    3478             :                              : 0.0) +
    3479      132530 :                             -4.0 * xp1 * xp1 * xp1
    3480             :                       : 0.0) +
    3481             :                      xp2c
    3482      146888 :                : 0.0;  // * 0.166666666666666666666
    3483             : }
    3484             : 
    3485     3262790 : static double GWKBSpline4Values(double *padfValues)
    3486             : {
    3487    16314000 :     for (int i = 0; i < 4; i++)
    3488             :     {
    3489    13051200 :         const double x = padfValues[i];
    3490    13051200 :         const double xp2 = x + 2.0;
    3491    13051200 :         const double xp1 = x + 1.0;
    3492    13051200 :         const double xm1 = x - 1.0;
    3493             : 
    3494             :         // This will most likely be used, so we'll compute it ahead of time to
    3495             :         // avoid stalling the processor.
    3496    13051200 :         const double xp2c = xp2 * xp2 * xp2;
    3497             : 
    3498             :         // Note that the test is computed only if it is needed.
    3499             :         // TODO(schwehr): Make this easier to follow.
    3500    13051200 :         padfValues[i] =
    3501             :             (xp2 > 0.0)
    3502    26102300 :                 ? ((xp1 > 0.0)
    3503    13051200 :                        ? ((x > 0.0)
    3504     9788180 :                               ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3505     6518520 :                                     6.0 * x * x * x
    3506             :                               : 0.0) +
    3507     9788180 :                              -4.0 * xp1 * xp1 * xp1
    3508             :                        : 0.0) +
    3509             :                       xp2c
    3510             :                 : 0.0;  // * 0.166666666666666666666
    3511             :     }
    3512     3262790 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3513             : }
    3514             : /************************************************************************/
    3515             : /*                       GWKResampleWrkStruct                           */
    3516             : /************************************************************************/
    3517             : 
    3518             : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
    3519             : 
    3520             : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
    3521             :                                    double dfSrcX, double dfSrcY,
    3522             :                                    double *pdfDensity, double *pdfReal,
    3523             :                                    double *pdfImag,
    3524             :                                    GWKResampleWrkStruct *psWrkStruct);
    3525             : 
    3526             : struct _GWKResampleWrkStruct
    3527             : {
    3528             :     pfnGWKResampleType pfnGWKResample;
    3529             : 
    3530             :     // Space for saved X weights.
    3531             :     double *padfWeightsX;
    3532             :     bool *pabCalcX;
    3533             : 
    3534             :     double *padfWeightsY;  // Only used by GWKResampleOptimizedLanczos.
    3535             :     int iLastSrcX;         // Only used by GWKResampleOptimizedLanczos.
    3536             :     int iLastSrcY;         // Only used by GWKResampleOptimizedLanczos.
    3537             :     double dfLastDeltaX;   // Only used by GWKResampleOptimizedLanczos.
    3538             :     double dfLastDeltaY;   // Only used by GWKResampleOptimizedLanczos.
    3539             : 
    3540             :     // Space for saving a row of pixels.
    3541             :     double *padfRowDensity;
    3542             :     double *padfRowReal;
    3543             :     double *padfRowImag;
    3544             : };
    3545             : 
    3546             : /************************************************************************/
    3547             : /*                    GWKResampleCreateWrkStruct()                      */
    3548             : /************************************************************************/
    3549             : 
    3550             : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3551             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3552             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
    3553             : 
    3554             : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3555             :                                         double dfSrcX, double dfSrcY,
    3556             :                                         double *pdfDensity, double *pdfReal,
    3557             :                                         double *pdfImag,
    3558             :                                         GWKResampleWrkStruct *psWrkStruct);
    3559             : 
    3560         343 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
    3561             : {
    3562         343 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3563         343 :     const int nYDist = (poWK->nYRadius + 1) * 2;
    3564             : 
    3565             :     GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
    3566         343 :         CPLMalloc(sizeof(GWKResampleWrkStruct)));
    3567             : 
    3568             :     // Alloc space for saved X weights.
    3569         343 :     psWrkStruct->padfWeightsX =
    3570         343 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3571         343 :     psWrkStruct->pabCalcX =
    3572         343 :         static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
    3573             : 
    3574         343 :     psWrkStruct->padfWeightsY =
    3575         343 :         static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
    3576         343 :     psWrkStruct->iLastSrcX = -10;
    3577         343 :     psWrkStruct->iLastSrcY = -10;
    3578         343 :     psWrkStruct->dfLastDeltaX = -10;
    3579         343 :     psWrkStruct->dfLastDeltaY = -10;
    3580             : 
    3581             :     // Alloc space for saving a row of pixels.
    3582         343 :     if (poWK->pafUnifiedSrcDensity == nullptr &&
    3583         316 :         poWK->panUnifiedSrcValid == nullptr &&
    3584         304 :         poWK->papanBandSrcValid == nullptr)
    3585             :     {
    3586         304 :         psWrkStruct->padfRowDensity = nullptr;
    3587             :     }
    3588             :     else
    3589             :     {
    3590          39 :         psWrkStruct->padfRowDensity =
    3591          39 :             static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3592             :     }
    3593         343 :     psWrkStruct->padfRowReal =
    3594         343 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3595         343 :     psWrkStruct->padfRowImag =
    3596         343 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3597             : 
    3598         343 :     if (poWK->eResample == GRA_Lanczos)
    3599             :     {
    3600          65 :         psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
    3601             : 
    3602          65 :         const double dfXScale = poWK->dfXScale;
    3603          65 :         if (dfXScale < 1.0)
    3604             :         {
    3605           3 :             int iMin = poWK->nFiltInitX;
    3606           3 :             int iMax = poWK->nXRadius;
    3607           3 :             while (iMin * dfXScale < -3.0)
    3608           0 :                 iMin++;
    3609           3 :             while (iMax * dfXScale > 3.0)
    3610           0 :                 iMax--;
    3611             : 
    3612         102 :             for (int i = iMin; i <= iMax; ++i)
    3613             :             {
    3614          99 :                 psWrkStruct->padfWeightsX[i - poWK->nFiltInitX] =
    3615          99 :                     GWKLanczosSinc(i * dfXScale);
    3616             :             }
    3617             :         }
    3618             : 
    3619          65 :         const double dfYScale = poWK->dfYScale;
    3620          65 :         if (dfYScale < 1.0)
    3621             :         {
    3622          10 :             int jMin = poWK->nFiltInitY;
    3623          10 :             int jMax = poWK->nYRadius;
    3624          17 :             while (jMin * dfYScale < -3.0)
    3625           7 :                 jMin++;
    3626          17 :             while (jMax * dfYScale > 3.0)
    3627           7 :                 jMax--;
    3628             : 
    3629         158 :             for (int j = jMin; j <= jMax; ++j)
    3630             :             {
    3631         148 :                 psWrkStruct->padfWeightsY[j - poWK->nFiltInitY] =
    3632         148 :                     GWKLanczosSinc(j * dfYScale);
    3633             :             }
    3634             :         }
    3635             :     }
    3636             :     else
    3637         278 :         psWrkStruct->pfnGWKResample = GWKResample;
    3638             : 
    3639         343 :     return psWrkStruct;
    3640             : }
    3641             : 
    3642             : /************************************************************************/
    3643             : /*                    GWKResampleDeleteWrkStruct()                      */
    3644             : /************************************************************************/
    3645             : 
    3646         343 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
    3647             : {
    3648         343 :     CPLFree(psWrkStruct->padfWeightsX);
    3649         343 :     CPLFree(psWrkStruct->padfWeightsY);
    3650         343 :     CPLFree(psWrkStruct->pabCalcX);
    3651         343 :     CPLFree(psWrkStruct->padfRowDensity);
    3652         343 :     CPLFree(psWrkStruct->padfRowReal);
    3653         343 :     CPLFree(psWrkStruct->padfRowImag);
    3654         343 :     CPLFree(psWrkStruct);
    3655         343 : }
    3656             : 
    3657             : /************************************************************************/
    3658             : /*                           GWKResample()                              */
    3659             : /************************************************************************/
    3660             : 
    3661      239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3662             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3663             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
    3664             : 
    3665             : {
    3666             :     // Save as local variables to avoid following pointers in loops.
    3667      239383 :     const int nSrcXSize = poWK->nSrcXSize;
    3668      239383 :     const int nSrcYSize = poWK->nSrcYSize;
    3669             : 
    3670      239383 :     double dfAccumulatorReal = 0.0;
    3671      239383 :     double dfAccumulatorImag = 0.0;
    3672      239383 :     double dfAccumulatorDensity = 0.0;
    3673      239383 :     double dfAccumulatorWeight = 0.0;
    3674      239383 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3675      239383 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3676      239383 :     const GPtrDiff_t iSrcOffset =
    3677      239383 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    3678      239383 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3679      239383 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3680             : 
    3681      239383 :     const double dfXScale = poWK->dfXScale;
    3682      239383 :     const double dfYScale = poWK->dfYScale;
    3683             : 
    3684      239383 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3685             : 
    3686             :     // Space for saved X weights.
    3687      239383 :     double *padfWeightsX = psWrkStruct->padfWeightsX;
    3688      239383 :     bool *pabCalcX = psWrkStruct->pabCalcX;
    3689             : 
    3690             :     // Space for saving a row of pixels.
    3691      239383 :     double *padfRowDensity = psWrkStruct->padfRowDensity;
    3692      239383 :     double *padfRowReal = psWrkStruct->padfRowReal;
    3693      239383 :     double *padfRowImag = psWrkStruct->padfRowImag;
    3694             : 
    3695             :     // Mark as needing calculation (don't calculate the weights yet,
    3696             :     // because a mask may render it unnecessary).
    3697      239383 :     memset(pabCalcX, false, nXDist * sizeof(bool));
    3698             : 
    3699      239383 :     FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
    3700      239383 :     CPLAssert(pfnGetWeight);
    3701             : 
    3702             :     // Skip sampling over edge of image.
    3703      239383 :     int j = poWK->nFiltInitY;
    3704      239383 :     int jMax = poWK->nYRadius;
    3705      239383 :     if (iSrcY + j < 0)
    3706         566 :         j = -iSrcY;
    3707      239383 :     if (iSrcY + jMax >= nSrcYSize)
    3708         662 :         jMax = nSrcYSize - iSrcY - 1;
    3709             : 
    3710      239383 :     int iMin = poWK->nFiltInitX;
    3711      239383 :     int iMax = poWK->nXRadius;
    3712      239383 :     if (iSrcX + iMin < 0)
    3713         566 :         iMin = -iSrcX;
    3714      239383 :     if (iSrcX + iMax >= nSrcXSize)
    3715         659 :         iMax = nSrcXSize - iSrcX - 1;
    3716             : 
    3717      239383 :     const int bXScaleBelow1 = (dfXScale < 1.0);
    3718      239383 :     const int bYScaleBelow1 = (dfYScale < 1.0);
    3719             : 
    3720      239383 :     GPtrDiff_t iRowOffset =
    3721      239383 :         iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
    3722             : 
    3723             :     // Loop over pixel rows in the kernel.
    3724     1445930 :     for (; j <= jMax; ++j)
    3725             :     {
    3726     1206540 :         iRowOffset += nSrcXSize;
    3727             : 
    3728             :         // Get pixel values.
    3729             :         // We can potentially read extra elements after the "normal" end of the
    3730             :         // source arrays, but the contract of papabySrcImage[iBand],
    3731             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    3732             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    3733     1206540 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    3734             :                             padfRowDensity, padfRowReal, padfRowImag))
    3735          72 :             continue;
    3736             : 
    3737             :         // Calculate the Y weight.
    3738             :         double dfWeight1 = (bYScaleBelow1)
    3739     1206470 :                                ? pfnGetWeight((j - dfDeltaY) * dfYScale)
    3740        1600 :                                : pfnGetWeight(j - dfDeltaY);
    3741             : 
    3742             :         // Iterate over pixels in row.
    3743     1206470 :         double dfAccumulatorRealLocal = 0.0;
    3744     1206470 :         double dfAccumulatorImagLocal = 0.0;
    3745     1206470 :         double dfAccumulatorDensityLocal = 0.0;
    3746     1206470 :         double dfAccumulatorWeightLocal = 0.0;
    3747             : 
    3748     7317420 :         for (int i = iMin; i <= iMax; ++i)
    3749             :         {
    3750             :             // Skip sampling if pixel has zero density.
    3751     6110940 :             if (padfRowDensity != nullptr &&
    3752       77277 :                 padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
    3753         546 :                 continue;
    3754             : 
    3755     6110400 :             double dfWeight2 = 0.0;
    3756             : 
    3757             :             // Make or use a cached set of weights for this row.
    3758     6110400 :             if (pabCalcX[i - iMin])
    3759             :             {
    3760             :                 // Use saved weight value instead of recomputing it.
    3761     4903920 :                 dfWeight2 = padfWeightsX[i - iMin];
    3762             :             }
    3763             :             else
    3764             :             {
    3765             :                 // Calculate & save the X weight.
    3766     1206480 :                 padfWeightsX[i - iMin] = dfWeight2 =
    3767     1206480 :                     (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
    3768        1600 :                                     : pfnGetWeight(i - dfDeltaX);
    3769             : 
    3770     1206480 :                 pabCalcX[i - iMin] = true;
    3771             :             }
    3772             : 
    3773             :             // Accumulate!
    3774     6110400 :             dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
    3775     6110400 :             dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
    3776     6110400 :             if (padfRowDensity != nullptr)
    3777       76731 :                 dfAccumulatorDensityLocal +=
    3778       76731 :                     padfRowDensity[i - iMin] * dfWeight2;
    3779     6110400 :             dfAccumulatorWeightLocal += dfWeight2;
    3780             :         }
    3781             : 
    3782     1206470 :         dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
    3783     1206470 :         dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
    3784     1206470 :         dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
    3785     1206470 :         dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
    3786             :     }
    3787             : 
    3788      239383 :     if (dfAccumulatorWeight < 0.000001 ||
    3789        1887 :         (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
    3790             :     {
    3791           0 :         *pdfDensity = 0.0;
    3792           0 :         return false;
    3793             :     }
    3794             : 
    3795             :     // Calculate the output taking into account weighting.
    3796      239383 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    3797             :     {
    3798      239380 :         *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
    3799      239380 :         *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
    3800      239380 :         if (padfRowDensity != nullptr)
    3801        1884 :             *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
    3802             :         else
    3803      237496 :             *pdfDensity = 1.0;
    3804             :     }
    3805             :     else
    3806             :     {
    3807           3 :         *pdfReal = dfAccumulatorReal;
    3808           3 :         *pdfImag = dfAccumulatorImag;
    3809           3 :         if (padfRowDensity != nullptr)
    3810           3 :             *pdfDensity = dfAccumulatorDensity;
    3811             :         else
    3812           0 :             *pdfDensity = 1.0;
    3813             :     }
    3814             : 
    3815      239383 :     return true;
    3816             : }
    3817             : 
    3818             : /************************************************************************/
    3819             : /*                      GWKResampleOptimizedLanczos()                   */
    3820             : /************************************************************************/
    3821             : 
    3822      533394 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3823             :                                         double dfSrcX, double dfSrcY,
    3824             :                                         double *pdfDensity, double *pdfReal,
    3825             :                                         double *pdfImag,
    3826             :                                         GWKResampleWrkStruct *psWrkStruct)
    3827             : 
    3828             : {
    3829             :     // Save as local variables to avoid following pointers in loops.
    3830      533394 :     const int nSrcXSize = poWK->nSrcXSize;
    3831      533394 :     const int nSrcYSize = poWK->nSrcYSize;
    3832             : 
    3833      533394 :     double dfAccumulatorReal = 0.0;
    3834      533394 :     double dfAccumulatorImag = 0.0;
    3835      533394 :     double dfAccumulatorDensity = 0.0;
    3836      533394 :     double dfAccumulatorWeight = 0.0;
    3837      533394 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3838      533394 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3839      533394 :     const GPtrDiff_t iSrcOffset =
    3840      533394 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    3841      533394 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3842      533394 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3843             : 
    3844      533394 :     const double dfXScale = poWK->dfXScale;
    3845      533394 :     const double dfYScale = poWK->dfYScale;
    3846             : 
    3847             :     // Space for saved X weights.
    3848      533394 :     double *padfWeightsX = psWrkStruct->padfWeightsX;
    3849      533394 :     double *padfWeightsY = psWrkStruct->padfWeightsY;
    3850             : 
    3851             :     // Space for saving a row of pixels.
    3852      533394 :     double *padfRowDensity = psWrkStruct->padfRowDensity;
    3853      533394 :     double *padfRowReal = psWrkStruct->padfRowReal;
    3854      533394 :     double *padfRowImag = psWrkStruct->padfRowImag;
    3855             : 
    3856             :     // Skip sampling over edge of image.
    3857      533394 :     int jMin = poWK->nFiltInitY;
    3858      533394 :     int jMax = poWK->nYRadius;
    3859      533394 :     if (iSrcY + jMin < 0)
    3860       17172 :         jMin = -iSrcY;
    3861      533394 :     if (iSrcY + jMax >= nSrcYSize)
    3862        4432 :         jMax = nSrcYSize - iSrcY - 1;
    3863             : 
    3864      533394 :     int iMin = poWK->nFiltInitX;
    3865      533394 :     int iMax = poWK->nXRadius;
    3866      533394 :     if (iSrcX + iMin < 0)
    3867       14772 :         iMin = -iSrcX;
    3868      533394 :     if (iSrcX + iMax >= nSrcXSize)
    3869        3832 :         iMax = nSrcXSize - iSrcX - 1;
    3870             : 
    3871      533394 :     if (dfXScale < 1.0)
    3872             :     {
    3873      199112 :         while (iMin * dfXScale < -3.0)
    3874           0 :             iMin++;
    3875      199112 :         while (iMax * dfXScale > 3.0)
    3876           0 :             iMax--;
    3877             :         // padfWeightsX computed in GWKResampleCreateWrkStruct.
    3878             :     }
    3879             :     else
    3880             :     {
    3881      627142 :         while (iMin - dfDeltaX < -3.0)
    3882      292860 :             iMin++;
    3883      334282 :         while (iMax - dfDeltaX > 3.0)
    3884           0 :             iMax--;
    3885             : 
    3886      334282 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    3887      209580 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    3888             :         {
    3889             :             // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
    3890             :             // following trigonometric formulas.
    3891             : 
    3892             :             // TODO(schwehr): Move this somewhere where it can be rendered at
    3893             :             // LaTeX. sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI *
    3894             :             // k) + cos(M_PI * dfBase) * sin(M_PI * k) sin(M_PI * (dfBase + k))
    3895             :             // = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
    3896             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
    3897             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 :
    3898             :             // -1)
    3899             : 
    3900             :             // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) *
    3901             :             // cos(M_PI / dfR * k) + cos(M_PI / dfR * dfBase) * sin(M_PI / dfR *
    3902             :             // k) sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI /
    3903             :             // dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
    3904             : 
    3905      334282 :             const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
    3906      334282 :             const double dfSin2PIDeltaXOver3 =
    3907             :                 dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
    3908             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
    3909      334282 :             const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
    3910      334282 :             const double dfSinPIDeltaX =
    3911      334282 :                 (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
    3912      334282 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    3913      334282 :             const double dfInvPI2Over3xSinPIDeltaX =
    3914             :                 dfInvPI2Over3 * dfSinPIDeltaX;
    3915      334282 :             const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
    3916      334282 :                 -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
    3917      334282 :             const double dfSinPIOver3 = 0.8660254037844386;
    3918      334282 :             const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
    3919      334282 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
    3920             :             const double padfCst[] = {
    3921      334282 :                 dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
    3922      334282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
    3923             :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
    3924      334282 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
    3925      334282 :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
    3926             : 
    3927     2350460 :             for (int i = iMin; i <= iMax; ++i)
    3928             :             {
    3929     2016170 :                 const double dfX = i - dfDeltaX;
    3930     2016170 :                 if (dfX == 0.0)
    3931       29482 :                     padfWeightsX[i - poWK->nFiltInitX] = 1.0;
    3932             :                 else
    3933     1986690 :                     padfWeightsX[i - poWK->nFiltInitX] =
    3934     1986690 :                         padfCst[(i + 3) % 3] / (dfX * dfX);
    3935             : #if DEBUG_VERBOSE
    3936             :                     // TODO(schwehr): AlmostEqual.
    3937             :                     // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
    3938             :                     //               GWKLanczosSinc(dfX, 3.0)) < 1e-10);
    3939             : #endif
    3940             :             }
    3941             : 
    3942      334282 :             psWrkStruct->iLastSrcX = iSrcX;
    3943      334282 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    3944             :         }
    3945             :     }
    3946             : 
    3947      533394 :     if (dfYScale < 1.0)
    3948             :     {
    3949      199112 :         while (jMin * dfYScale < -3.0)
    3950           0 :             jMin++;
    3951      199112 :         while (jMax * dfYScale > 3.0)
    3952           0 :             jMax--;
    3953             :         // padfWeightsY computed in GWKResampleCreateWrkStruct.
    3954             :     }
    3955             :     else
    3956             :     {
    3957      588742 :         while (jMin - dfDeltaY < -3.0)
    3958      254460 :             jMin++;
    3959      334282 :         while (jMax - dfDeltaY > 3.0)
    3960           0 :             jMax--;
    3961             : 
    3962      334282 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    3963      333832 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    3964             :         {
    3965         932 :             const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
    3966         932 :             const double dfSin2PIDeltaYOver3 =
    3967             :                 dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
    3968             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
    3969         932 :             const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
    3970         932 :             const double dfSinPIDeltaY =
    3971         932 :                 (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
    3972         932 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    3973         932 :             const double dfInvPI2Over3xSinPIDeltaY =
    3974             :                 dfInvPI2Over3 * dfSinPIDeltaY;
    3975         932 :             const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
    3976         932 :                 -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
    3977         932 :             const double dfSinPIOver3 = 0.8660254037844386;
    3978         932 :             const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
    3979         932 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
    3980             :             const double padfCst[] = {
    3981         932 :                 dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
    3982         932 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
    3983             :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
    3984         932 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
    3985         932 :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
    3986             : 
    3987        6365 :             for (int j = jMin; j <= jMax; ++j)
    3988             :             {
    3989        5433 :                 const double dfY = j - dfDeltaY;
    3990        5433 :                 if (dfY == 0.0)
    3991         296 :                     padfWeightsY[j - poWK->nFiltInitY] = 1.0;
    3992             :                 else
    3993        5137 :                     padfWeightsY[j - poWK->nFiltInitY] =
    3994        5137 :                         padfCst[(j + 3) % 3] / (dfY * dfY);
    3995             : #if DEBUG_VERBOSE
    3996             :                     // TODO(schwehr): AlmostEqual.
    3997             :                     // CPLAssert(fabs(padfWeightsY[j-poWK->nFiltInitY] -
    3998             :                     //               GWKLanczosSinc(dfY, 3.0)) < 1e-10);
    3999             : #endif
    4000             :             }
    4001             : 
    4002         932 :             psWrkStruct->iLastSrcY = iSrcY;
    4003         932 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4004             :         }
    4005             :     }
    4006             : 
    4007      533394 :     GPtrDiff_t iRowOffset =
    4008      533394 :         iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
    4009             : 
    4010             :     // If we have no density information, we can simply compute the
    4011             :     // accumulated weight.
    4012      533394 :     if (padfRowDensity == nullptr)
    4013             :     {
    4014      533394 :         double dfRowAccWeight = 0.0;
    4015     7466350 :         for (int i = iMin; i <= iMax; ++i)
    4016             :         {
    4017     6932950 :             dfRowAccWeight += padfWeightsX[i - poWK->nFiltInitX];
    4018             :         }
    4019      533394 :         double dfColAccWeight = 0.0;
    4020     7500750 :         for (int j = jMin; j <= jMax; ++j)
    4021             :         {
    4022     6967350 :             dfColAccWeight += padfWeightsY[j - poWK->nFiltInitY];
    4023             :         }
    4024      533394 :         dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
    4025             :     }
    4026             : 
    4027      533394 :     const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
    4028             : 
    4029             :     // Loop over pixel rows in the kernel.
    4030      533394 :     int nCountValid = 0;
    4031     7500750 :     for (int j = jMin; j <= jMax; ++j)
    4032             :     {
    4033     6967350 :         iRowOffset += nSrcXSize;
    4034             : 
    4035             :         // Get pixel values.
    4036             :         // We can potentially read extra elements after the "normal" end of the
    4037             :         // source arrays, but the contract of papabySrcImage[iBand],
    4038             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4039             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4040     6967350 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4041             :                             padfRowDensity, padfRowReal, padfRowImag))
    4042           0 :             continue;
    4043             : 
    4044     6967350 :         const double dfWeight1 = padfWeightsY[j - poWK->nFiltInitY];
    4045             : 
    4046             :         // Iterate over pixels in row.
    4047     6967350 :         if (padfRowDensity != nullptr)
    4048             :         {
    4049           0 :             for (int i = iMin; i <= iMax; ++i)
    4050             :             {
    4051             :                 // Skip sampling if pixel has zero density.
    4052           0 :                 if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
    4053           0 :                     continue;
    4054             : 
    4055           0 :                 nCountValid++;
    4056             : 
    4057             :                 //  Use a cached set of weights for this row.
    4058           0 :                 const double dfWeight2 =
    4059           0 :                     dfWeight1 * padfWeightsX[i - poWK->nFiltInitX];
    4060             : 
    4061             :                 // Accumulate!
    4062           0 :                 dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
    4063           0 :                 dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
    4064           0 :                 dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
    4065           0 :                 dfAccumulatorWeight += dfWeight2;
    4066             :             }
    4067             :         }
    4068     6967350 :         else if (bIsNonComplex)
    4069             :         {
    4070     6966180 :             double dfRowAccReal = 0.0;
    4071   141134000 :             for (int i = iMin; i <= iMax; ++i)
    4072             :             {
    4073   134168000 :                 const double dfWeight2 = padfWeightsX[i - poWK->nFiltInitX];
    4074             : 
    4075             :                 // Accumulate!
    4076   134168000 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4077             :             }
    4078             : 
    4079     6966180 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4080             :         }
    4081             :         else
    4082             :         {
    4083        1176 :             double dfRowAccReal = 0.0;
    4084        1176 :             double dfRowAccImag = 0.0;
    4085        7040 :             for (int i = iMin; i <= iMax; ++i)
    4086             :             {
    4087        5864 :                 const double dfWeight2 = padfWeightsX[i - poWK->nFiltInitX];
    4088             : 
    4089             :                 // Accumulate!
    4090        5864 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4091        5864 :                 dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
    4092             :             }
    4093             : 
    4094        1176 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4095        1176 :             dfAccumulatorImag += dfRowAccImag * dfWeight1;
    4096             :         }
    4097             :     }
    4098             : 
    4099      533394 :     if (dfAccumulatorWeight < 0.000001 ||
    4100           0 :         (padfRowDensity != nullptr &&
    4101           0 :          (dfAccumulatorDensity < 0.000001 ||
    4102           0 :           nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
    4103             :     {
    4104           0 :         *pdfDensity = 0.0;
    4105           0 :         return false;
    4106             :     }
    4107             : 
    4108             :     // Calculate the output taking into account weighting.
    4109      533394 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4110             :     {
    4111      509096 :         const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4112      509096 :         *pdfReal = dfAccumulatorReal * dfInvAcc;
    4113      509096 :         *pdfImag = dfAccumulatorImag * dfInvAcc;
    4114      509096 :         if (padfRowDensity != nullptr)
    4115           0 :             *pdfDensity = dfAccumulatorDensity * dfInvAcc;
    4116             :         else
    4117      509096 :             *pdfDensity = 1.0;
    4118             :     }
    4119             :     else
    4120             :     {
    4121       24298 :         *pdfReal = dfAccumulatorReal;
    4122       24298 :         *pdfImag = dfAccumulatorImag;
    4123       24298 :         if (padfRowDensity != nullptr)
    4124           0 :             *pdfDensity = dfAccumulatorDensity;
    4125             :         else
    4126       24298 :             *pdfDensity = 1.0;
    4127             :     }
    4128             : 
    4129      533394 :     return true;
    4130             : }
    4131             : 
    4132             : /************************************************************************/
    4133             : /*                        GWKResampleNoMasksT()                         */
    4134             : /************************************************************************/
    4135             : 
    4136             : template <class T>
    4137             : static bool GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand,
    4138             :                                 double dfSrcX, double dfSrcY, T *pValue,
    4139             :                                 double *padfWeight)
    4140             : 
    4141             : {
    4142             :     // Commonly used; save locally.
    4143             :     const int nSrcXSize = poWK->nSrcXSize;
    4144             :     const int nSrcYSize = poWK->nSrcYSize;
    4145             : 
    4146             :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4147             :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4148             :     const GPtrDiff_t iSrcOffset =
    4149             :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4150             : 
    4151             :     const int nXRadius = poWK->nXRadius;
    4152             :     const int nYRadius = poWK->nYRadius;
    4153             : 
    4154             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4155             :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4156             :         nYRadius > nSrcYSize)
    4157             :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4158             :                                                   pValue);
    4159             : 
    4160             :     T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    4161             :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4162             :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4163             : 
    4164             :     const FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
    4165             :     CPLAssert(pfnGetWeight);
    4166             :     const FilterFunc4ValuesType pfnGetWeight4Values =
    4167             :         apfGWKFilter4Values[poWK->eResample];
    4168             :     CPLAssert(pfnGetWeight4Values);
    4169             : 
    4170             :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4171             :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4172             : 
    4173             :     // Loop over all rows in the kernel.
    4174             :     double dfAccumulatorWeightHorizontal = 0.0;
    4175             :     double dfAccumulatorWeightVertical = 0.0;
    4176             : 
    4177             :     int iMin = 1 - nXRadius;
    4178             :     if (iSrcX + iMin < 0)
    4179             :         iMin = -iSrcX;
    4180             :     int iMax = nXRadius;
    4181             :     if (iSrcX + iMax >= nSrcXSize - 1)
    4182             :         iMax = nSrcXSize - 1 - iSrcX;
    4183             :     int i = iMin;  // Used after for.
    4184             :     int iC = 0;    // Used after for.
    4185             :     for (; i + 2 < iMax; i += 4, iC += 4)
    4186             :     {
    4187             :         padfWeight[iC] = (i - dfDeltaX) * dfXScale;
    4188             :         padfWeight[iC + 1] = padfWeight[iC] + dfXScale;
    4189             :         padfWeight[iC + 2] = padfWeight[iC + 1] + dfXScale;
    4190             :         padfWeight[iC + 3] = padfWeight[iC + 2] + dfXScale;
    4191             :         dfAccumulatorWeightHorizontal += pfnGetWeight4Values(padfWeight + iC);
    4192             :     }
    4193             :     for (; i <= iMax; ++i, ++iC)
    4194             :     {
    4195             :         const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
    4196             :         padfWeight[iC] = dfWeight;
    4197             :         dfAccumulatorWeightHorizontal += dfWeight;
    4198             :     }
    4199             : 
    4200             :     int j = 1 - nYRadius;
    4201             :     if (iSrcY + j < 0)
    4202             :         j = -iSrcY;
    4203             :     int jMax = nYRadius;
    4204             :     if (iSrcY + jMax >= nSrcYSize - 1)
    4205             :         jMax = nSrcYSize - 1 - iSrcY;
    4206             : 
    4207             :     double dfAccumulator = 0.0;
    4208             : 
    4209             :     for (; j <= jMax; ++j)
    4210             :     {
    4211             :         const GPtrDiff_t iSampJ =
    4212             :             iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
    4213             : 
    4214             :         // Loop over all pixels in the row.
    4215             :         double dfAccumulatorLocal = 0.0;
    4216             :         double dfAccumulatorLocal2 = 0.0;
    4217             :         iC = 0;
    4218             :         i = iMin;
    4219             :         // Process by chunk of 4 cols.
    4220             :         for (; i + 2 < iMax; i += 4, iC += 4)
    4221             :         {
    4222             :             // Retrieve the pixel & accumulate.
    4223             :             dfAccumulatorLocal += pSrcBand[i + iSampJ] * padfWeight[iC];
    4224             :             dfAccumulatorLocal += pSrcBand[i + 1 + iSampJ] * padfWeight[iC + 1];
    4225             :             dfAccumulatorLocal2 +=
    4226             :                 pSrcBand[i + 2 + iSampJ] * padfWeight[iC + 2];
    4227             :             dfAccumulatorLocal2 +=
    4228             :                 pSrcBand[i + 3 + iSampJ] * padfWeight[iC + 3];
    4229             :         }
    4230             :         dfAccumulatorLocal += dfAccumulatorLocal2;
    4231             :         if (i < iMax)
    4232             :         {
    4233             :             dfAccumulatorLocal += pSrcBand[i + iSampJ] * padfWeight[iC];
    4234             :             dfAccumulatorLocal += pSrcBand[i + 1 + iSampJ] * padfWeight[iC + 1];
    4235             :             i += 2;
    4236             :             iC += 2;
    4237             :         }
    4238             :         if (i == iMax)
    4239             :         {
    4240             :             dfAccumulatorLocal += pSrcBand[i + iSampJ] * padfWeight[iC];
    4241             :         }
    4242             : 
    4243             :         // Calculate the Y weight.
    4244             :         const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
    4245             :         dfAccumulator += dfWeight * dfAccumulatorLocal;
    4246             :         dfAccumulatorWeightVertical += dfWeight;
    4247             :     }
    4248             : 
    4249             :     const double dfAccumulatorWeight =
    4250             :         dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical;
    4251             : 
    4252             :     *pValue = GWKClampValueT<T>(dfAccumulator / dfAccumulatorWeight);
    4253             : 
    4254             :     return true;
    4255             : }
    4256             : 
    4257             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    4258             : /* Could possibly be used too on 32bit, but we would need to check at runtime */
    4259             : #if defined(__x86_64) || defined(_M_X64)
    4260             : 
    4261             : /************************************************************************/
    4262             : /*                    GWKResampleNoMasks_SSE2_T()                       */
    4263             : /************************************************************************/
    4264             : 
    4265             : template <class T>
    4266     9160629 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
    4267             :                                       double dfSrcX, double dfSrcY, T *pValue,
    4268             :                                       double *padfWeight)
    4269             : {
    4270             :     // Commonly used; save locally.
    4271     9160629 :     const int nSrcXSize = poWK->nSrcXSize;
    4272     9160629 :     const int nSrcYSize = poWK->nSrcYSize;
    4273             : 
    4274     9160629 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4275     9160629 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4276     9160629 :     const GPtrDiff_t iSrcOffset =
    4277     9160629 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4278     9160629 :     const int nXRadius = poWK->nXRadius;
    4279     9160629 :     const int nYRadius = poWK->nYRadius;
    4280             : 
    4281             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4282     9160629 :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4283             :         nYRadius > nSrcYSize)
    4284       43826 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4285           3 :                                                   pValue);
    4286             : 
    4287     9116797 :     const T *pSrcBand =
    4288     9116797 :         reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    4289             : 
    4290     9116797 :     const FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
    4291     9116797 :     CPLAssert(pfnGetWeight);
    4292     9116797 :     const FilterFunc4ValuesType pfnGetWeight4Values =
    4293     9116797 :         apfGWKFilter4Values[poWK->eResample];
    4294     9116797 :     CPLAssert(pfnGetWeight4Values);
    4295             : 
    4296     9116797 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4297     9116797 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4298     9116797 :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4299     9159457 :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4300             : 
    4301             :     // Loop over all rows in the kernel.
    4302     9144167 :     double dfAccumulatorWeightHorizontal = 0.0;
    4303     9144167 :     double dfAccumulatorWeightVertical = 0.0;
    4304     9144167 :     double dfAccumulator = 0.0;
    4305             : 
    4306     9144167 :     int iMin = 1 - nXRadius;
    4307     9144167 :     if (iSrcX + iMin < 0)
    4308       43143 :         iMin = -iSrcX;
    4309     9144167 :     int iMax = nXRadius;
    4310     9144167 :     if (iSrcX + iMax >= nSrcXSize - 1)
    4311       38106 :         iMax = nSrcXSize - 1 - iSrcX;
    4312             :     int i, iC;
    4313    20603714 :     for (iC = 0, i = iMin; i + 2 < iMax; i += 4, iC += 4)
    4314             :     {
    4315    11448807 :         padfWeight[iC] = (i - dfDeltaX) * dfXScale;
    4316    11448807 :         padfWeight[iC + 1] = padfWeight[iC] + dfXScale;
    4317    11448807 :         padfWeight[iC + 2] = padfWeight[iC + 1] + dfXScale;
    4318    11448807 :         padfWeight[iC + 3] = padfWeight[iC + 2] + dfXScale;
    4319    11448807 :         dfAccumulatorWeightHorizontal += pfnGetWeight4Values(padfWeight + iC);
    4320             :     }
    4321     9452142 :     for (; i <= iMax; ++i, ++iC)
    4322             :     {
    4323      290914 :         double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
    4324      297235 :         padfWeight[iC] = dfWeight;
    4325      297235 :         dfAccumulatorWeightHorizontal += dfWeight;
    4326             :     }
    4327             : 
    4328     9161227 :     int j = 1 - nYRadius;
    4329     9161227 :     if (iSrcY + j < 0)
    4330       49554 :         j = -iSrcY;
    4331     9161227 :     int jMax = nYRadius;
    4332     9161227 :     if (iSrcY + jMax >= nSrcYSize - 1)
    4333       42428 :         jMax = nSrcYSize - 1 - iSrcY;
    4334             : 
    4335             :     // Process by chunk of 4 rows.
    4336    19372211 :     for (; j + 2 < jMax; j += 4)
    4337             :     {
    4338    10230784 :         const GPtrDiff_t iSampJ =
    4339    10230784 :             iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
    4340             : 
    4341             :         // Loop over all pixels in the row.
    4342    10230784 :         iC = 0;
    4343    10230784 :         i = iMin;
    4344             :         // Process by chunk of 4 cols.
    4345    10230784 :         XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
    4346    10217574 :         XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
    4347    10169164 :         XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
    4348    10227424 :         XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
    4349    26715216 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4350             :         {
    4351             :             // Retrieve the pixel & accumulate.
    4352    16457712 :             XMMReg4Double v_pixels_1 =
    4353    16457712 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4354    16501812 :             XMMReg4Double v_pixels_2 =
    4355    16501812 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
    4356    16507112 :             XMMReg4Double v_pixels_3 =
    4357    16507112 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4358    16484912 :             XMMReg4Double v_pixels_4 =
    4359    16484912 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4360             : 
    4361    16494612 :             XMMReg4Double v_padfWeight =
    4362    16494612 :                 XMMReg4Double::Load4Val(padfWeight + iC);
    4363             : 
    4364    16491412 :             v_acc_1 += v_pixels_1 * v_padfWeight;
    4365    16490612 :             v_acc_2 += v_pixels_2 * v_padfWeight;
    4366    16492412 :             v_acc_3 += v_pixels_3 * v_padfWeight;
    4367    16480812 :             v_acc_4 += v_pixels_4 * v_padfWeight;
    4368             :         }
    4369             : 
    4370    10257524 :         if (i < iMax)
    4371             :         {
    4372      142904 :             XMMReg2Double v_pixels_1 =
    4373      142904 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
    4374      142904 :             XMMReg2Double v_pixels_2 =
    4375      142904 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
    4376      142904 :             XMMReg2Double v_pixels_3 =
    4377      142904 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    4378      142904 :             XMMReg2Double v_pixels_4 =
    4379      142904 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    4380             : 
    4381      142904 :             XMMReg2Double v_padfWeight =
    4382      142904 :                 XMMReg2Double::Load2Val(padfWeight + iC);
    4383             : 
    4384      142904 :             v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
    4385      142904 :             v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
    4386      142904 :             v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
    4387      142904 :             v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
    4388             : 
    4389      142904 :             i += 2;
    4390      142904 :             iC += 2;
    4391             :         }
    4392             : 
    4393    10257524 :         double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
    4394    10236274 :         double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
    4395    10230644 :         double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
    4396    10256804 :         double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
    4397             : 
    4398    10257504 :         if (i == iMax)
    4399             :         {
    4400       49195 :             dfAccumulatorLocal_1 +=
    4401       49195 :                 static_cast<double>(pSrcBand[i + iSampJ]) * padfWeight[iC];
    4402       49195 :             dfAccumulatorLocal_2 +=
    4403       49195 :                 static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
    4404       49195 :                 padfWeight[iC];
    4405       49195 :             dfAccumulatorLocal_3 +=
    4406       49195 :                 static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
    4407       49195 :                 padfWeight[iC];
    4408       49195 :             dfAccumulatorLocal_4 +=
    4409       49195 :                 static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
    4410       49195 :                 padfWeight[iC];
    4411             :         }
    4412             : 
    4413             :         // Calculate the Y weight.
    4414    10257504 :         const double dfWeight0 = (j - dfDeltaY) * dfYScale;
    4415    10257504 :         const double dfWeight1 = dfWeight0 + dfYScale;
    4416    10257504 :         const double dfWeight2 = dfWeight1 + dfYScale;
    4417    10257504 :         const double dfWeight3 = dfWeight2 + dfYScale;
    4418    10257504 :         double adfWeight[4] = {dfWeight0, dfWeight1, dfWeight2, dfWeight3};
    4419             : 
    4420    10257504 :         dfAccumulatorWeightVertical += pfnGetWeight4Values(adfWeight);
    4421    10210994 :         dfAccumulator += adfWeight[0] * dfAccumulatorLocal_1;
    4422    10210994 :         dfAccumulator += adfWeight[1] * dfAccumulatorLocal_2;
    4423    10210994 :         dfAccumulator += adfWeight[2] * dfAccumulatorLocal_3;
    4424    10210994 :         dfAccumulator += adfWeight[3] * dfAccumulatorLocal_4;
    4425             :     }
    4426    22252347 :     for (; j <= jMax; ++j)
    4427             :     {
    4428    13088940 :         const GPtrDiff_t iSampJ =
    4429    13088940 :             iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
    4430             : 
    4431             :         // Loop over all pixels in the row.
    4432    13088940 :         iC = 0;
    4433    13088940 :         i = iMin;
    4434             :         // Process by chunk of 4 cols.
    4435    13088940 :         XMMReg4Double v_acc = XMMReg4Double::Zero();
    4436    26136963 :         for (; i + 2 < iMax; i += 4, iC += 4)
    4437             :         {
    4438             :             // Retrieve the pixel & accumulate.
    4439    13086523 :             XMMReg4Double v_pixels =
    4440    13086523 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    4441    13133123 :             XMMReg4Double v_padfWeight =
    4442    13133123 :                 XMMReg4Double::Load4Val(padfWeight + iC);
    4443             : 
    4444    13142823 :             v_acc += v_pixels * v_padfWeight;
    4445             :         }
    4446             : 
    4447    13050440 :         double dfAccumulatorLocal = v_acc.GetHorizSum();
    4448             : 
    4449    13050840 :         if (i < iMax)
    4450             :         {
    4451      173964 :             dfAccumulatorLocal += pSrcBand[i + iSampJ] * padfWeight[iC];
    4452      173964 :             dfAccumulatorLocal += pSrcBand[i + 1 + iSampJ] * padfWeight[iC + 1];
    4453      173964 :             i += 2;
    4454      173964 :             iC += 2;
    4455             :         }
    4456    13050840 :         if (i == iMax)
    4457             :         {
    4458       33020 :             dfAccumulatorLocal +=
    4459       33020 :                 static_cast<double>(pSrcBand[i + iSampJ]) * padfWeight[iC];
    4460             :         }
    4461             : 
    4462             :         // Calculate the Y weight.
    4463    13050840 :         double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
    4464    13110940 :         dfAccumulator += dfWeight * dfAccumulatorLocal;
    4465    13110940 :         dfAccumulatorWeightVertical += dfWeight;
    4466             :     }
    4467             : 
    4468     9163457 :     const double dfAccumulatorWeight =
    4469             :         dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical;
    4470             : 
    4471     9163457 :     *pValue = GWKClampValueT<T>(dfAccumulator / dfAccumulatorWeight);
    4472             : 
    4473     9151367 :     return true;
    4474             : }
    4475             : 
    4476             : /************************************************************************/
    4477             : /*                     GWKResampleNoMasksT<GByte>()                     */
    4478             : /************************************************************************/
    4479             : 
    4480             : template <>
    4481     8581420 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
    4482             :                                 double dfSrcX, double dfSrcY, GByte *pValue,
    4483             :                                 double *padfWeight)
    4484             : {
    4485     8581420 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4486     8518860 :                                      padfWeight);
    4487             : }
    4488             : 
    4489             : /************************************************************************/
    4490             : /*                     GWKResampleNoMasksT<GInt16>()                    */
    4491             : /************************************************************************/
    4492             : 
    4493             : template <>
    4494      252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
    4495             :                                  double dfSrcX, double dfSrcY, GInt16 *pValue,
    4496             :                                  double *padfWeight)
    4497             : {
    4498      252563 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4499      252563 :                                      padfWeight);
    4500             : }
    4501             : 
    4502             : /************************************************************************/
    4503             : /*                     GWKResampleNoMasksT<GUInt16>()                   */
    4504             : /************************************************************************/
    4505             : 
    4506             : template <>
    4507      343446 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
    4508             :                                   double dfSrcX, double dfSrcY, GUInt16 *pValue,
    4509             :                                   double *padfWeight)
    4510             : {
    4511      343446 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4512      343446 :                                      padfWeight);
    4513             : }
    4514             : 
    4515             : /************************************************************************/
    4516             : /*                     GWKResampleNoMasksT<float>()                     */
    4517             : /************************************************************************/
    4518             : 
    4519             : template <>
    4520        2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
    4521             :                                 double dfSrcX, double dfSrcY, float *pValue,
    4522             :                                 double *padfWeight)
    4523             : {
    4524        2500 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4525        2500 :                                      padfWeight);
    4526             : }
    4527             : 
    4528             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    4529             : 
    4530             : /************************************************************************/
    4531             : /*                     GWKResampleNoMasksT<double>()                    */
    4532             : /************************************************************************/
    4533             : 
    4534             : template <>
    4535             : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
    4536             :                                  double dfSrcX, double dfSrcY, double *pValue,
    4537             :                                  double *padfWeight)
    4538             : {
    4539             :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    4540             :                                      padfWeight);
    4541             : }
    4542             : 
    4543             : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
    4544             : 
    4545             : #endif /* defined(__x86_64) || defined(_M_X64) */
    4546             : 
    4547             : /************************************************************************/
    4548             : /*                     GWKRoundSourceCoordinates()                      */
    4549             : /************************************************************************/
    4550             : 
    4551        2500 : static void GWKRoundSourceCoordinates(
    4552             :     int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
    4553             :     double dfSrcCoordPrecision, double dfErrorThreshold,
    4554             :     GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
    4555             :     double dfDstY)
    4556             : {
    4557        2500 :     double dfPct = 0.8;
    4558        2500 :     if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
    4559             :     {
    4560        2500 :         dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
    4561             :     }
    4562        2500 :     const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
    4563             : 
    4564      502500 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    4565             :     {
    4566      500000 :         const double dfXBefore = padfX[iDstX];
    4567      500000 :         const double dfYBefore = padfY[iDstX];
    4568      500000 :         padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    4569             :                        dfSrcCoordPrecision;
    4570      500000 :         padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    4571             :                        dfSrcCoordPrecision;
    4572             : 
    4573             :         // If we are in an uncertainty zone, go to non-approximated
    4574             :         // transformation.
    4575             :         // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
    4576             :         // be at least 10 times greater than the approximation error.
    4577      500000 :         if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
    4578      399911 :             fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
    4579             :         {
    4580      180103 :             padfX[iDstX] = iDstX + dfDstXOff;
    4581      180103 :             padfY[iDstX] = dfDstY;
    4582      180103 :             padfZ[iDstX] = 0.0;
    4583      180103 :             pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
    4584      180103 :                            padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
    4585      180103 :             padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    4586             :                            dfSrcCoordPrecision;
    4587      180103 :             padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    4588             :                            dfSrcCoordPrecision;
    4589             :         }
    4590             :     }
    4591        2500 : }
    4592             : 
    4593             : /************************************************************************/
    4594             : /*                           GWKOpenCLCase()                            */
    4595             : /*                                                                      */
    4596             : /*      This is identical to GWKGeneralCase(), but functions via        */
    4597             : /*      OpenCL. This means we have vector optimization (SSE) and/or     */
    4598             : /*      GPU optimization depending on our prefs. The code itself is     */
    4599             : /*      general and not optimized, but by defining constants we can     */
    4600             : /*      make some pretty darn good code on the fly.                     */
    4601             : /************************************************************************/
    4602             : 
    4603             : #if defined(HAVE_OPENCL)
    4604           0 : static CPLErr GWKOpenCLCase(GDALWarpKernel *poWK)
    4605             : {
    4606           0 :     const int nDstXSize = poWK->nDstXSize;
    4607           0 :     const int nDstYSize = poWK->nDstYSize;
    4608           0 :     const int nSrcXSize = poWK->nSrcXSize;
    4609           0 :     const int nSrcYSize = poWK->nSrcYSize;
    4610           0 :     const int nDstXOff = poWK->nDstXOff;
    4611           0 :     const int nDstYOff = poWK->nDstYOff;
    4612           0 :     const int nSrcXOff = poWK->nSrcXOff;
    4613           0 :     const int nSrcYOff = poWK->nSrcYOff;
    4614           0 :     bool bUseImag = false;
    4615             : 
    4616             :     cl_channel_type imageFormat;
    4617           0 :     switch (poWK->eWorkingDataType)
    4618             :     {
    4619           0 :         case GDT_Byte:
    4620           0 :             imageFormat = CL_UNORM_INT8;
    4621           0 :             break;
    4622           0 :         case GDT_UInt16:
    4623           0 :             imageFormat = CL_UNORM_INT16;
    4624           0 :             break;
    4625           0 :         case GDT_CInt16:
    4626           0 :             bUseImag = true;
    4627             :             [[fallthrough]];
    4628           0 :         case GDT_Int16:
    4629           0 :             imageFormat = CL_SNORM_INT16;
    4630           0 :             break;
    4631           0 :         case GDT_CFloat32:
    4632           0 :             bUseImag = true;
    4633             :             [[fallthrough]];
    4634           0 :         case GDT_Float32:
    4635           0 :             imageFormat = CL_FLOAT;
    4636           0 :             break;
    4637           0 :         default:
    4638             :             // No support for higher precision formats.
    4639           0 :             CPLDebug("OpenCL", "Unsupported resampling OpenCL data type %d.",
    4640           0 :                      static_cast<int>(poWK->eWorkingDataType));
    4641           0 :             return CE_Warning;
    4642             :     }
    4643             : 
    4644             :     OCLResampAlg resampAlg;
    4645           0 :     switch (poWK->eResample)
    4646             :     {
    4647           0 :         case GRA_Bilinear:
    4648           0 :             resampAlg = OCL_Bilinear;
    4649           0 :             break;
    4650           0 :         case GRA_Cubic:
    4651           0 :             resampAlg = OCL_Cubic;
    4652           0 :             break;
    4653           0 :         case GRA_CubicSpline:
    4654           0 :             resampAlg = OCL_CubicSpline;
    4655           0 :             break;
    4656           0 :         case GRA_Lanczos:
    4657           0 :             resampAlg = OCL_Lanczos;
    4658           0 :             break;
    4659           0 :         default:
    4660             :             // No support for higher precision formats.
    4661           0 :             CPLDebug("OpenCL",
    4662             :                      "Unsupported resampling OpenCL resampling alg %d.",
    4663           0 :                      static_cast<int>(poWK->eResample));
    4664           0 :             return CE_Warning;
    4665             :     }
    4666             : 
    4667           0 :     struct oclWarper *warper = nullptr;
    4668             :     cl_int err;
    4669           0 :     CPLErr eErr = CE_None;
    4670             : 
    4671             :     // TODO(schwehr): Fix indenting.
    4672             :     try
    4673             :     {
    4674             : 
    4675             :         // Using a factor of 2 or 4 seems to have much less rounding error
    4676             :         // than 3 on the GPU.
    4677             :         // Then the rounding error can cause strange artifacts under the
    4678             :         // right conditions.
    4679           0 :         warper = GDALWarpKernelOpenCL_createEnv(
    4680             :             nSrcXSize, nSrcYSize, nDstXSize, nDstYSize, imageFormat,
    4681           0 :             poWK->nBands, 4, bUseImag, poWK->papanBandSrcValid != nullptr,
    4682             :             poWK->pafDstDensity, poWK->padfDstNoDataReal, resampAlg, &err);
    4683             : 
    4684           0 :         if (err != CL_SUCCESS || warper == nullptr)
    4685             :         {
    4686           0 :             eErr = CE_Warning;
    4687           0 :             if (warper != nullptr)
    4688           0 :                 throw eErr;
    4689           0 :             return eErr;
    4690             :         }
    4691             : 
    4692           0 :         CPLDebug("GDAL",
    4693             :                  "GDALWarpKernel()::GWKOpenCLCase() "
    4694             :                  "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
    4695             :                  nSrcXOff, nSrcYOff, nSrcXSize, nSrcYSize, nDstXOff, nDstYOff,
    4696             :                  nDstXSize, nDstYSize);
    4697             : 
    4698           0 :         if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
    4699             :         {
    4700           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    4701           0 :             eErr = CE_Failure;
    4702           0 :             throw eErr;
    4703             :         }
    4704             : 
    4705             :         /* ====================================================================
    4706             :          */
    4707             :         /*      Loop over bands. */
    4708             :         /* ====================================================================
    4709             :          */
    4710           0 :         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    4711             :         {
    4712           0 :             if (poWK->papanBandSrcValid != nullptr &&
    4713           0 :                 poWK->papanBandSrcValid[iBand] != nullptr)
    4714             :             {
    4715           0 :                 GDALWarpKernelOpenCL_setSrcValid(
    4716             :                     warper,
    4717           0 :                     reinterpret_cast<int *>(poWK->papanBandSrcValid[iBand]),
    4718             :                     iBand);
    4719           0 :                 if (err != CL_SUCCESS)
    4720             :                 {
    4721           0 :                     CPLError(
    4722             :                         CE_Failure, CPLE_AppDefined,
    4723             :                         "OpenCL routines reported failure (%d) on line %d.",
    4724             :                         static_cast<int>(err), __LINE__);
    4725           0 :                     eErr = CE_Failure;
    4726           0 :                     throw eErr;
    4727             :                 }
    4728             :             }
    4729             : 
    4730           0 :             err = GDALWarpKernelOpenCL_setSrcImg(
    4731           0 :                 warper, poWK->papabySrcImage[iBand], iBand);
    4732           0 :             if (err != CL_SUCCESS)
    4733             :             {
    4734           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    4735             :                          "OpenCL routines reported failure (%d) on line %d.",
    4736             :                          static_cast<int>(err), __LINE__);
    4737           0 :                 eErr = CE_Failure;
    4738           0 :                 throw eErr;
    4739             :             }
    4740             : 
    4741           0 :             err = GDALWarpKernelOpenCL_setDstImg(
    4742           0 :                 warper, poWK->papabyDstImage[iBand], iBand);
    4743           0 :             if (err != CL_SUCCESS)
    4744             :             {
    4745           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    4746             :                          "OpenCL routines reported failure (%d) on line %d.",
    4747             :                          static_cast<int>(err), __LINE__);
    4748           0 :                 eErr = CE_Failure;
    4749           0 :                 throw eErr;
    4750             :             }
    4751             :         }
    4752             : 
    4753             :         /* --------------------------------------------------------------------
    4754             :          */
    4755             :         /*      Allocate x,y,z coordinate arrays for transformation ... one */
    4756             :         /*      scanlines worth of positions. */
    4757             :         /* --------------------------------------------------------------------
    4758             :          */
    4759             : 
    4760             :         // For x, 2 *, because we cache the precomputed values at the end.
    4761             :         double *padfX =
    4762           0 :             static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    4763             :         double *padfY =
    4764           0 :             static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    4765             :         double *padfZ =
    4766           0 :             static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    4767             :         int *pabSuccess =
    4768           0 :             static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    4769           0 :         const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    4770           0 :             poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    4771           0 :         const double dfErrorThreshold = CPLAtof(CSLFetchNameValueDef(
    4772           0 :             poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    4773             : 
    4774             :         // Precompute values.
    4775           0 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    4776           0 :             padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    4777             : 
    4778             :         /* ====================================================================
    4779             :          */
    4780             :         /*      Loop over output lines. */
    4781             :         /* ====================================================================
    4782             :          */
    4783           0 :         for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; ++iDstY)
    4784             :         {
    4785             :             /* ----------------------------------------------------------------
    4786             :              */
    4787             :             /*      Setup points to transform to source image space. */
    4788             :             /* ----------------------------------------------------------------
    4789             :              */
    4790           0 :             memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    4791           0 :             const double dfYConst = iDstY + 0.5 + poWK->nDstYOff;
    4792           0 :             for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    4793           0 :                 padfY[iDstX] = dfYConst;
    4794           0 :             memset(padfZ, 0, sizeof(double) * nDstXSize);
    4795             : 
    4796             :             /* ----------------------------------------------------------------
    4797             :              */
    4798             :             /*      Transform the points from destination pixel/line
    4799             :              * coordinates*/
    4800             :             /*      to source pixel/line coordinates. */
    4801             :             /* ----------------------------------------------------------------
    4802             :              */
    4803           0 :             poWK->pfnTransformer(poWK->pTransformerArg, TRUE, nDstXSize, padfX,
    4804             :                                  padfY, padfZ, pabSuccess);
    4805           0 :             if (dfSrcCoordPrecision > 0.0)
    4806             :             {
    4807           0 :                 GWKRoundSourceCoordinates(
    4808             :                     nDstXSize, padfX, padfY, padfZ, pabSuccess,
    4809             :                     dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
    4810             :                     poWK->pTransformerArg, 0.5 + nDstXOff,
    4811           0 :                     iDstY + 0.5 + nDstYOff);
    4812             :             }
    4813             : 
    4814           0 :             err = GDALWarpKernelOpenCL_setCoordRow(
    4815             :                 warper, padfX, padfY, nSrcXOff, nSrcYOff, pabSuccess, iDstY);
    4816           0 :             if (err != CL_SUCCESS)
    4817             :             {
    4818           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    4819             :                          "OpenCL routines reported failure (%d) on line %d.",
    4820             :                          static_cast<int>(err), __LINE__);
    4821           0 :                 eErr = CE_Failure;
    4822           0 :                 break;
    4823             :             }
    4824             : 
    4825             :             // Update the valid & density masks because we don't do so in the
    4826             :             // kernel.
    4827           0 :             for (int iDstX = 0; iDstX < nDstXSize && eErr == CE_None; iDstX++)
    4828             :             {
    4829           0 :                 const double dfX = padfX[iDstX];
    4830           0 :                 const double dfY = padfY[iDstX];
    4831           0 :                 const GPtrDiff_t iDstOffset =
    4832           0 :                     iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    4833             : 
    4834             :                 // See GWKGeneralCase() for appropriate commenting.
    4835           0 :                 if (!pabSuccess[iDstX] || dfX < nSrcXOff || dfY < nSrcYOff)
    4836           0 :                     continue;
    4837             : 
    4838           0 :                 int iSrcX = static_cast<int>(dfX) - nSrcXOff;
    4839           0 :                 int iSrcY = static_cast<int>(dfY) - nSrcYOff;
    4840             : 
    4841           0 :                 if (iSrcX < 0 || iSrcX >= nSrcXSize || iSrcY < 0 ||
    4842             :                     iSrcY >= nSrcYSize)
    4843           0 :                     continue;
    4844             : 
    4845           0 :                 GPtrDiff_t iSrcOffset =
    4846           0 :                     iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4847           0 :                 double dfDensity = 1.0;
    4848             : 
    4849           0 :                 if (poWK->pafUnifiedSrcDensity != nullptr && iSrcX >= 0 &&
    4850           0 :                     iSrcY >= 0 && iSrcX < nSrcXSize && iSrcY < nSrcYSize)
    4851           0 :                     dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    4852             : 
    4853           0 :                 GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    4854             : 
    4855             :                 // Because this is on the bit-wise level, it can't be done well
    4856             :                 // in OpenCL.
    4857           0 :                 if (poWK->panDstValid != nullptr)
    4858           0 :                     poWK->panDstValid[iDstOffset >> 5] |=
    4859           0 :                         0x01 << (iDstOffset & 0x1f);
    4860             :             }
    4861             :         }
    4862             : 
    4863           0 :         CPLFree(padfX);
    4864           0 :         CPLFree(padfY);
    4865           0 :         CPLFree(padfZ);
    4866           0 :         CPLFree(pabSuccess);
    4867             : 
    4868           0 :         if (eErr != CE_None)
    4869           0 :             throw eErr;
    4870             : 
    4871           0 :         err = GDALWarpKernelOpenCL_runResamp(
    4872             :             warper, poWK->pafUnifiedSrcDensity, poWK->panUnifiedSrcValid,
    4873             :             poWK->pafDstDensity, poWK->panDstValid, poWK->dfXScale,
    4874             :             poWK->dfYScale, poWK->dfXFilter, poWK->dfYFilter, poWK->nXRadius,
    4875             :             poWK->nYRadius, poWK->nFiltInitX, poWK->nFiltInitY);
    4876             : 
    4877           0 :         if (err != CL_SUCCESS)
    4878             :         {
    4879           0 :             CPLError(CE_Failure, CPLE_AppDefined,
    4880             :                      "OpenCL routines reported failure (%d) on line %d.",
    4881             :                      static_cast<int>(err), __LINE__);
    4882           0 :             eErr = CE_Failure;
    4883           0 :             throw eErr;
    4884             :         }
    4885             : 
    4886             :         /* ====================================================================
    4887             :          */
    4888             :         /*      Loop over output lines. */
    4889             :         /* ====================================================================
    4890             :          */
    4891           0 :         for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; iDstY++)
    4892             :         {
    4893           0 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    4894             :             {
    4895           0 :                 void *rowReal = nullptr;
    4896           0 :                 void *rowImag = nullptr;
    4897           0 :                 GByte *pabyDst = poWK->papabyDstImage[iBand];
    4898             : 
    4899           0 :                 err = GDALWarpKernelOpenCL_getRow(warper, &rowReal, &rowImag,
    4900             :                                                   iDstY, iBand);
    4901           0 :                 if (err != CL_SUCCESS)
    4902             :                 {
    4903           0 :                     CPLError(
    4904             :                         CE_Failure, CPLE_AppDefined,
    4905             :                         "OpenCL routines reported failure (%d) on line %d.",
    4906             :                         static_cast<int>(err), __LINE__);
    4907           0 :                     eErr = CE_Failure;
    4908           0 :                     throw eErr;
    4909             :                 }
    4910             : 
    4911             :                 // Copy the data from the warper to GDAL's memory.
    4912           0 :                 switch (poWK->eWorkingDataType)
    4913             :                 {
    4914           0 :                     case GDT_Byte:
    4915           0 :                         memcpy(&(pabyDst[iDstY * nDstXSize]), rowReal,
    4916             :                                sizeof(GByte) * nDstXSize);
    4917           0 :                         break;
    4918           0 :                     case GDT_Int16:
    4919           0 :                         memcpy(&(reinterpret_cast<GInt16 *>(
    4920           0 :                                    pabyDst)[iDstY * nDstXSize]),
    4921           0 :                                rowReal, sizeof(GInt16) * nDstXSize);
    4922           0 :                         break;
    4923           0 :                     case GDT_UInt16:
    4924           0 :                         memcpy(&(reinterpret_cast<GUInt16 *>(
    4925           0 :                                    pabyDst)[iDstY * nDstXSize]),
    4926           0 :                                rowReal, sizeof(GUInt16) * nDstXSize);
    4927           0 :                         break;
    4928           0 :                     case GDT_Float32:
    4929           0 :                         memcpy(&(reinterpret_cast<float *>(
    4930           0 :                                    pabyDst)[iDstY * nDstXSize]),
    4931           0 :                                rowReal, sizeof(float) * nDstXSize);
    4932           0 :                         break;
    4933           0 :                     case GDT_CInt16:
    4934             :                     {
    4935           0 :                         GInt16 *pabyDstI16 = &(reinterpret_cast<GInt16 *>(
    4936           0 :                             pabyDst)[iDstY * nDstXSize]);
    4937           0 :                         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    4938             :                         {
    4939           0 :                             pabyDstI16[iDstX * 2] =
    4940           0 :                                 static_cast<GInt16 *>(rowReal)[iDstX];
    4941           0 :                             pabyDstI16[iDstX * 2 + 1] =
    4942           0 :                                 static_cast<GInt16 *>(rowImag)[iDstX];
    4943             :                         }
    4944             :                     }
    4945           0 :                     break;
    4946           0 :                     case GDT_CFloat32:
    4947             :                     {
    4948           0 :                         float *pabyDstF32 = &(reinterpret_cast<float *>(
    4949           0 :                             pabyDst)[iDstY * nDstXSize]);
    4950           0 :                         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    4951             :                         {
    4952           0 :                             pabyDstF32[iDstX * 2] =
    4953           0 :                                 static_cast<float *>(rowReal)[iDstX];
    4954           0 :                             pabyDstF32[iDstX * 2 + 1] =
    4955           0 :                                 static_cast<float *>(rowImag)[iDstX];
    4956             :                         }
    4957             :                     }
    4958           0 :                     break;
    4959           0 :                     default:
    4960             :                         // No support for higher precision formats.
    4961           0 :                         CPLError(CE_Failure, CPLE_AppDefined,
    4962             :                                  "Unsupported resampling OpenCL data type %d.",
    4963           0 :                                  static_cast<int>(poWK->eWorkingDataType));
    4964           0 :                         eErr = CE_Failure;
    4965           0 :                         throw eErr;
    4966             :                 }
    4967             :             }
    4968             :         }
    4969             :     }
    4970           0 :     catch (const CPLErr &)
    4971             :     {
    4972             :     }
    4973             : 
    4974           0 :     if ((err = GDALWarpKernelOpenCL_deleteEnv(warper)) != CL_SUCCESS)
    4975             :     {
    4976           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    4977             :                  "OpenCL routines reported failure (%d) on line %d.",
    4978             :                  static_cast<int>(err), __LINE__);
    4979           0 :         return CE_Failure;
    4980             :     }
    4981             : 
    4982           0 :     return eErr;
    4983             : }
    4984             : #endif /* defined(HAVE_OPENCL) */
    4985             : 
    4986             : /************************************************************************/
    4987             : /*                     GWKCheckAndComputeSrcOffsets()                   */
    4988             : /************************************************************************/
    4989             : static CPL_INLINE bool
    4990   108691000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
    4991             :                              int _iDstY, double *_padfX, double *_padfY,
    4992             :                              int _nSrcXSize, int _nSrcYSize,
    4993             :                              GPtrDiff_t &iSrcOffset)
    4994             : {
    4995   108691000 :     const GDALWarpKernel *_poWK = psJob->poWK;
    4996   108813000 :     for (int iTry = 0; iTry < 2; ++iTry)
    4997             :     {
    4998   108816000 :         if (iTry == 1)
    4999             :         {
    5000             :             // If the source coordinate is slightly outside of the source raster
    5001             :             // retry to transform it alone, so that the exact coordinate
    5002             :             // transformer is used.
    5003             : 
    5004      122163 :             _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
    5005      122163 :             _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
    5006      122163 :             double dfZ = 0;
    5007      122163 :             _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
    5008      122163 :                                   _padfX + _iDstX, _padfY + _iDstX, &dfZ,
    5009      122163 :                                   _pabSuccess + _iDstX);
    5010             :         }
    5011   108816000 :         if (!_pabSuccess[_iDstX])
    5012     3593220 :             return false;
    5013             : 
    5014             :         // If this happens this is likely the symptom of a bug somewhere.
    5015   105223000 :         if (CPLIsNan(_padfX[_iDstX]) || CPLIsNan(_padfY[_iDstX]))
    5016             :         {
    5017             :             static bool bNanCoordFound = false;
    5018        1680 :             if (!bNanCoordFound)
    5019             :             {
    5020           0 :                 CPLDebug("WARP",
    5021             :                          "GWKCheckAndComputeSrcOffsets(): "
    5022             :                          "NaN coordinate found on point %d.",
    5023             :                          _iDstX);
    5024           0 :                 bNanCoordFound = true;
    5025             :             }
    5026        1680 :             return false;
    5027             :         }
    5028             : 
    5029             :         /* --------------------------------------------------------------------
    5030             :          */
    5031             :         /*      Figure out what pixel we want in our source raster, and skip */
    5032             :         /*      further processing if it is well off the source image. */
    5033             :         /* --------------------------------------------------------------------
    5034             :          */
    5035             :         /* We test against the value before casting to avoid the */
    5036             :         /* problem of asymmetric truncation effects around zero.  That is */
    5037             :         /* -0.5 will be 0 when cast to an int. */
    5038   105221000 :         if (_padfX[_iDstX] < _poWK->nSrcXOff)
    5039             :         {
    5040             :             // If the source coordinate is slightly outside of the source raster
    5041             :             // retry to transform it alone, so that the exact coordinate
    5042             :             // transformer is used.
    5043     4108450 :             if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
    5044       20675 :                 continue;
    5045     4087780 :             return false;
    5046             :         }
    5047             : 
    5048   101113000 :         if (_padfY[_iDstX] < _poWK->nSrcYOff)
    5049             :         {
    5050             :             // If the source coordinate is slightly outside of the source raster
    5051             :             // retry to transform it alone, so that the exact coordinate
    5052             :             // transformer is used.
    5053     4778860 :             if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
    5054       37441 :                 continue;
    5055     4741420 :             return false;
    5056             :         }
    5057             : 
    5058             :         // Check for potential overflow when casting from float to int, (if
    5059             :         // operating outside natural projection area, padfX/Y can be a very huge
    5060             :         // positive number before doing the actual conversion), as such cast is
    5061             :         // undefined behavior that can trigger exception with some compilers
    5062             :         // (see #6753)
    5063    96334200 :         if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
    5064             :         {
    5065             :             // If the source coordinate is slightly outside of the source raster
    5066             :             // retry to transform it alone, so that the exact coordinate
    5067             :             // transformer is used.
    5068     3466190 :             if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
    5069       32342 :                 continue;
    5070     3433850 :             return false;
    5071             :         }
    5072    92868000 :         if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
    5073             :         {
    5074             :             // If the source coordinate is slightly outside of the source raster
    5075             :             // retry to transform it alone, so that the exact coordinate
    5076             :             // transformer is used.
    5077     3693150 :             if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
    5078       31705 :                 continue;
    5079     3661450 :             return false;
    5080             :         }
    5081             : 
    5082    89174800 :         break;
    5083             :     }
    5084             : 
    5085    89171700 :     int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
    5086    89171700 :     int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
    5087    89171700 :     if (iSrcX == _nSrcXSize)
    5088           0 :         iSrcX--;
    5089    89171700 :     if (iSrcY == _nSrcYSize)
    5090           0 :         iSrcY--;
    5091             : 
    5092             :     // Those checks should normally be OK given the previous ones.
    5093    89171700 :     CPLAssert(iSrcX >= 0);
    5094    89171700 :     CPLAssert(iSrcY >= 0);
    5095    89171700 :     CPLAssert(iSrcX < _nSrcXSize);
    5096    89171700 :     CPLAssert(iSrcY < _nSrcYSize);
    5097             : 
    5098    89171700 :     iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
    5099             : 
    5100    89171700 :     return true;
    5101             : }
    5102             : 
    5103             : /************************************************************************/
    5104             : /*                   GWKOneSourceCornerFailsToReproject()               */
    5105             : /************************************************************************/
    5106             : 
    5107         704 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
    5108             : {
    5109         704 :     GDALWarpKernel *poWK = psJob->poWK;
    5110        2102 :     for (int iY = 0; iY <= 1; ++iY)
    5111             :     {
    5112        4200 :         for (int iX = 0; iX <= 1; ++iX)
    5113             :         {
    5114        2802 :             double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
    5115        2802 :             double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
    5116        2802 :             double dfZTmp = 0;
    5117        2802 :             int nSuccess = FALSE;
    5118        2802 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
    5119             :                                  &dfYTmp, &dfZTmp, &nSuccess);
    5120        2802 :             if (!nSuccess)
    5121           6 :                 return true;
    5122             :         }
    5123             :     }
    5124         698 :     return false;
    5125             : }
    5126             : 
    5127             : /************************************************************************/
    5128             : /*                       GWKAdjustSrcOffsetOnEdge()                     */
    5129             : /************************************************************************/
    5130             : 
    5131        9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
    5132             :                                      GPtrDiff_t &iSrcOffset)
    5133             : {
    5134        9714 :     GDALWarpKernel *poWK = psJob->poWK;
    5135        9714 :     const int nSrcXSize = poWK->nSrcXSize;
    5136        9714 :     const int nSrcYSize = poWK->nSrcYSize;
    5137             : 
    5138             :     // Check if the computed source position slightly altered
    5139             :     // fails to reproject. If so, then we are at the edge of
    5140             :     // the validity area, and it is worth checking neighbour
    5141             :     // source pixels for validity.
    5142        9714 :     int nSuccess = FALSE;
    5143             :     {
    5144        9714 :         double dfXTmp =
    5145        9714 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5146        9714 :         double dfYTmp =
    5147        9714 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5148        9714 :         double dfZTmp = 0;
    5149        9714 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5150             :                              &dfZTmp, &nSuccess);
    5151             :     }
    5152        9714 :     if (nSuccess)
    5153             :     {
    5154        6996 :         double dfXTmp =
    5155        6996 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5156        6996 :         double dfYTmp =
    5157        6996 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5158        6996 :         double dfZTmp = 0;
    5159        6996 :         nSuccess = FALSE;
    5160        6996 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5161             :                              &dfZTmp, &nSuccess);
    5162             :     }
    5163        9714 :     if (nSuccess)
    5164             :     {
    5165        5624 :         double dfXTmp =
    5166        5624 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5167        5624 :         double dfYTmp =
    5168        5624 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5169        5624 :         double dfZTmp = 0;
    5170        5624 :         nSuccess = FALSE;
    5171        5624 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5172             :                              &dfZTmp, &nSuccess);
    5173             :     }
    5174             : 
    5175       14166 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5176        4452 :         CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
    5177             :     {
    5178        1860 :         iSrcOffset++;
    5179        1860 :         return true;
    5180             :     }
    5181       10290 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5182        2436 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
    5183             :     {
    5184        1334 :         iSrcOffset += nSrcXSize;
    5185        1334 :         return true;
    5186             :     }
    5187        7838 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5188        1318 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
    5189             :     {
    5190         956 :         iSrcOffset--;
    5191         956 :         return true;
    5192             :     }
    5193        5924 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5194         360 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
    5195             :     {
    5196         340 :         iSrcOffset -= nSrcXSize;
    5197         340 :         return true;
    5198             :     }
    5199             : 
    5200        5224 :     return false;
    5201             : }
    5202             : 
    5203             : /************************************************************************/
    5204             : /*                 GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity()          */
    5205             : /************************************************************************/
    5206             : 
    5207           0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
    5208             :                                                       GPtrDiff_t &iSrcOffset)
    5209             : {
    5210           0 :     GDALWarpKernel *poWK = psJob->poWK;
    5211           0 :     const int nSrcXSize = poWK->nSrcXSize;
    5212           0 :     const int nSrcYSize = poWK->nSrcYSize;
    5213             : 
    5214             :     // Check if the computed source position slightly altered
    5215             :     // fails to reproject. If so, then we are at the edge of
    5216             :     // the validity area, and it is worth checking neighbour
    5217             :     // source pixels for validity.
    5218           0 :     int nSuccess = FALSE;
    5219             :     {
    5220           0 :         double dfXTmp =
    5221           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5222           0 :         double dfYTmp =
    5223           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5224           0 :         double dfZTmp = 0;
    5225           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5226             :                              &dfZTmp, &nSuccess);
    5227             :     }
    5228           0 :     if (nSuccess)
    5229             :     {
    5230           0 :         double dfXTmp =
    5231           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5232           0 :         double dfYTmp =
    5233           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5234           0 :         double dfZTmp = 0;
    5235           0 :         nSuccess = FALSE;
    5236           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5237             :                              &dfZTmp, &nSuccess);
    5238             :     }
    5239           0 :     if (nSuccess)
    5240             :     {
    5241           0 :         double dfXTmp =
    5242           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5243           0 :         double dfYTmp =
    5244           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5245           0 :         double dfZTmp = 0;
    5246           0 :         nSuccess = FALSE;
    5247           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5248             :                              &dfZTmp, &nSuccess);
    5249             :     }
    5250             : 
    5251           0 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5252           0 :         poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >= SRC_DENSITY_THRESHOLD)
    5253             :     {
    5254           0 :         iSrcOffset++;
    5255           0 :         return true;
    5256             :     }
    5257           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5258           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
    5259             :                  SRC_DENSITY_THRESHOLD)
    5260             :     {
    5261           0 :         iSrcOffset += nSrcXSize;
    5262           0 :         return true;
    5263             :     }
    5264           0 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5265           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
    5266             :                  SRC_DENSITY_THRESHOLD)
    5267             :     {
    5268           0 :         iSrcOffset--;
    5269           0 :         return true;
    5270             :     }
    5271           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5272           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
    5273             :                  SRC_DENSITY_THRESHOLD)
    5274             :     {
    5275           0 :         iSrcOffset -= nSrcXSize;
    5276           0 :         return true;
    5277             :     }
    5278             : 
    5279           0 :     return false;
    5280             : }
    5281             : 
    5282             : /************************************************************************/
    5283             : /*                           GWKGeneralCase()                           */
    5284             : /*                                                                      */
    5285             : /*      This is the most general case.  It attempts to handle all       */
    5286             : /*      possible features with relatively little concern for            */
    5287             : /*      efficiency.                                                     */
    5288             : /************************************************************************/
    5289             : 
    5290         243 : static void GWKGeneralCaseThread(void *pData)
    5291             : {
    5292         243 :     GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
    5293         243 :     GDALWarpKernel *poWK = psJob->poWK;
    5294         243 :     const int iYMin = psJob->iYMin;
    5295         243 :     const int iYMax = psJob->iYMax;
    5296             :     const double dfMultFactorVerticalShiftPipeline =
    5297         243 :         poWK->bApplyVerticalShift
    5298         243 :             ? CPLAtof(CSLFetchNameValueDef(
    5299           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5300             :                   "1.0"))
    5301         243 :             : 0.0;
    5302             : 
    5303         243 :     int nDstXSize = poWK->nDstXSize;
    5304         243 :     int nSrcXSize = poWK->nSrcXSize;
    5305         243 :     int nSrcYSize = poWK->nSrcYSize;
    5306             : 
    5307             :     /* -------------------------------------------------------------------- */
    5308             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5309             :     /*      scanlines worth of positions.                                   */
    5310             :     /* -------------------------------------------------------------------- */
    5311             :     // For x, 2 *, because we cache the precomputed values at the end.
    5312             :     double *padfX =
    5313         243 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5314             :     double *padfY =
    5315         243 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5316             :     double *padfZ =
    5317         243 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5318         243 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5319             : 
    5320         243 :     const bool bUse4SamplesFormula =
    5321         243 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    5322             : 
    5323         243 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5324         243 :     if (poWK->eResample != GRA_NearestNeighbour)
    5325             :     {
    5326         224 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5327             :     }
    5328         243 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5329         243 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5330         243 :     const double dfErrorThreshold = CPLAtof(
    5331         243 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5332             : 
    5333             :     const bool bOneSourceCornerFailsToReproject =
    5334         243 :         GWKOneSourceCornerFailsToReproject(psJob);
    5335             : 
    5336             :     // Precompute values.
    5337        6513 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5338        6270 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5339             : 
    5340             :     /* ==================================================================== */
    5341             :     /*      Loop over output lines.                                         */
    5342             :     /* ==================================================================== */
    5343        6513 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5344             :     {
    5345             :         /* --------------------------------------------------------------------
    5346             :          */
    5347             :         /*      Setup points to transform to source image space. */
    5348             :         /* --------------------------------------------------------------------
    5349             :          */
    5350        6270 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5351        6270 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5352      242830 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5353      236560 :             padfY[iDstX] = dfY;
    5354        6270 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5355             : 
    5356             :         /* --------------------------------------------------------------------
    5357             :          */
    5358             :         /*      Transform the points from destination pixel/line coordinates */
    5359             :         /*      to source pixel/line coordinates. */
    5360             :         /* --------------------------------------------------------------------
    5361             :          */
    5362        6270 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5363             :                              padfY, padfZ, pabSuccess);
    5364        6270 :         if (dfSrcCoordPrecision > 0.0)
    5365             :         {
    5366           0 :             GWKRoundSourceCoordinates(
    5367             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5368             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5369           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5370             :         }
    5371             : 
    5372             :         /* ====================================================================
    5373             :          */
    5374             :         /*      Loop over pixels in output scanline. */
    5375             :         /* ====================================================================
    5376             :          */
    5377      242830 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5378             :         {
    5379      236560 :             GPtrDiff_t iSrcOffset = 0;
    5380      236560 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5381             :                                               padfX, padfY, nSrcXSize,
    5382             :                                               nSrcYSize, iSrcOffset))
    5383           0 :                 continue;
    5384             : 
    5385             :             /* --------------------------------------------------------------------
    5386             :              */
    5387             :             /*      Do not try to apply transparent/invalid source pixels to the
    5388             :              */
    5389             :             /*      destination.  This currently ignores the multi-pixel input
    5390             :              */
    5391             :             /*      of bilinear and cubic resamples. */
    5392             :             /* --------------------------------------------------------------------
    5393             :              */
    5394      236560 :             double dfDensity = 1.0;
    5395             : 
    5396      236560 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5397             :             {
    5398        1200 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5399        1200 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    5400             :                 {
    5401           0 :                     if (!bOneSourceCornerFailsToReproject)
    5402             :                     {
    5403           0 :                         continue;
    5404             :                     }
    5405           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5406             :                                  psJob, iSrcOffset))
    5407             :                     {
    5408           0 :                         dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5409             :                     }
    5410             :                     else
    5411             :                     {
    5412           0 :                         continue;
    5413             :                     }
    5414             :                 }
    5415             :             }
    5416             : 
    5417      236560 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5418           0 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5419             :             {
    5420           0 :                 if (!bOneSourceCornerFailsToReproject)
    5421             :                 {
    5422           0 :                     continue;
    5423             :                 }
    5424           0 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5425             :                 {
    5426           0 :                     continue;
    5427             :                 }
    5428             :             }
    5429             : 
    5430             :             /* ====================================================================
    5431             :              */
    5432             :             /*      Loop processing each band. */
    5433             :             /* ====================================================================
    5434             :              */
    5435      236560 :             bool bHasFoundDensity = false;
    5436             : 
    5437      236560 :             const GPtrDiff_t iDstOffset =
    5438      236560 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5439      473120 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5440             :             {
    5441      236560 :                 double dfBandDensity = 0.0;
    5442      236560 :                 double dfValueReal = 0.0;
    5443      236560 :                 double dfValueImag = 0.0;
    5444             : 
    5445             :                 /* --------------------------------------------------------------------
    5446             :                  */
    5447             :                 /*      Collect the source value. */
    5448             :                 /* --------------------------------------------------------------------
    5449             :                  */
    5450      236560 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5451             :                     nSrcYSize == 1)
    5452             :                 {
    5453             :                     // FALSE is returned if dfBandDensity == 0, which is
    5454             :                     // checked below.
    5455         568 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValue(
    5456             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
    5457             :                         &dfValueImag));
    5458             :                 }
    5459      235992 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5460             :                 {
    5461         648 :                     GWKBilinearResample4Sample(
    5462         648 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5463         648 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5464             :                         &dfValueReal, &dfValueImag);
    5465             :                 }
    5466      235344 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5467             :                 {
    5468         248 :                     GWKCubicResample4Sample(
    5469         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5470         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5471             :                         &dfValueReal, &dfValueImag);
    5472             :                 }
    5473             :                 else
    5474             : #ifdef DEBUG
    5475             :                     // Only useful for clang static analyzer.
    5476      235096 :                     if (psWrkStruct != nullptr)
    5477             : #endif
    5478             :                     {
    5479      235096 :                         psWrkStruct->pfnGWKResample(
    5480      235096 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5481      235096 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5482             :                             &dfValueReal, &dfValueImag, psWrkStruct);
    5483             :                     }
    5484             : 
    5485             :                 // If we didn't find any valid inputs skip to next band.
    5486      236560 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5487           0 :                     continue;
    5488             : 
    5489      236560 :                 if (poWK->bApplyVerticalShift)
    5490             :                 {
    5491           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5492           0 :                         continue;
    5493             :                     // Subtract padfZ[] since the coordinate transformation is
    5494             :                     // from target to source
    5495           0 :                     dfValueReal =
    5496           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5497           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5498             :                 }
    5499             : 
    5500      236560 :                 bHasFoundDensity = true;
    5501             : 
    5502             :                 /* --------------------------------------------------------------------
    5503             :                  */
    5504             :                 /*      We have a computed value from the source.  Now apply it
    5505             :                  * to      */
    5506             :                 /*      the destination pixel. */
    5507             :                 /* --------------------------------------------------------------------
    5508             :                  */
    5509      236560 :                 GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    5510             :                                  dfValueReal, dfValueImag);
    5511             :             }
    5512             : 
    5513      236560 :             if (!bHasFoundDensity)
    5514           0 :                 continue;
    5515             : 
    5516             :             /* --------------------------------------------------------------------
    5517             :              */
    5518             :             /*      Update destination density/validity masks. */
    5519             :             /* --------------------------------------------------------------------
    5520             :              */
    5521      236560 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5522             : 
    5523      236560 :             if (poWK->panDstValid != nullptr)
    5524             :             {
    5525           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5526             :             }
    5527             :         } /* Next iDstX */
    5528             : 
    5529             :         /* --------------------------------------------------------------------
    5530             :          */
    5531             :         /*      Report progress to the user, and optionally cancel out. */
    5532             :         /* --------------------------------------------------------------------
    5533             :          */
    5534        6270 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5535           0 :             break;
    5536             :     }
    5537             : 
    5538             :     /* -------------------------------------------------------------------- */
    5539             :     /*      Cleanup and return.                                             */
    5540             :     /* -------------------------------------------------------------------- */
    5541         243 :     CPLFree(padfX);
    5542         243 :     CPLFree(padfY);
    5543         243 :     CPLFree(padfZ);
    5544         243 :     CPLFree(pabSuccess);
    5545         243 :     if (psWrkStruct)
    5546         224 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5547         243 : }
    5548             : 
    5549         243 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
    5550             : {
    5551         243 :     return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
    5552             : }
    5553             : 
    5554             : /************************************************************************/
    5555             : /*                            GWKRealCase()                             */
    5556             : /*                                                                      */
    5557             : /*      General case for non-complex data types.                        */
    5558             : /************************************************************************/
    5559             : 
    5560         135 : static void GWKRealCaseThread(void *pData)
    5561             : 
    5562             : {
    5563         135 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    5564         135 :     GDALWarpKernel *poWK = psJob->poWK;
    5565         135 :     const int iYMin = psJob->iYMin;
    5566         135 :     const int iYMax = psJob->iYMax;
    5567             : 
    5568         135 :     const int nDstXSize = poWK->nDstXSize;
    5569         135 :     const int nSrcXSize = poWK->nSrcXSize;
    5570         135 :     const int nSrcYSize = poWK->nSrcYSize;
    5571             :     const double dfMultFactorVerticalShiftPipeline =
    5572         135 :         poWK->bApplyVerticalShift
    5573         135 :             ? CPLAtof(CSLFetchNameValueDef(
    5574           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5575             :                   "1.0"))
    5576         135 :             : 0.0;
    5577             : 
    5578             :     /* -------------------------------------------------------------------- */
    5579             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5580             :     /*      scanlines worth of positions.                                   */
    5581             :     /* -------------------------------------------------------------------- */
    5582             : 
    5583             :     // For x, 2 *, because we cache the precomputed values at the end.
    5584             :     double *padfX =
    5585         135 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5586             :     double *padfY =
    5587         135 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5588             :     double *padfZ =
    5589         135 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5590         135 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5591             : 
    5592         135 :     const bool bUse4SamplesFormula =
    5593         135 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    5594             : 
    5595         135 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5596         135 :     if (poWK->eResample != GRA_NearestNeighbour)
    5597             :     {
    5598         119 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5599             :     }
    5600         135 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5601         135 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5602         135 :     const double dfErrorThreshold = CPLAtof(
    5603         135 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5604             : 
    5605         390 :     const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
    5606         255 :                                    poWK->papanBandSrcValid == nullptr &&
    5607         120 :                                    poWK->pafUnifiedSrcDensity != nullptr;
    5608             : 
    5609             :     const bool bOneSourceCornerFailsToReproject =
    5610         135 :         GWKOneSourceCornerFailsToReproject(psJob);
    5611             : 
    5612             :     // Precompute values.
    5613       20216 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5614       20081 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5615             : 
    5616             :     /* ==================================================================== */
    5617             :     /*      Loop over output lines.                                         */
    5618             :     /* ==================================================================== */
    5619       21242 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5620             :     {
    5621             :         /* --------------------------------------------------------------------
    5622             :          */
    5623             :         /*      Setup points to transform to source image space. */
    5624             :         /* --------------------------------------------------------------------
    5625             :          */
    5626       21107 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5627       21107 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5628    43372400 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5629    43351300 :             padfY[iDstX] = dfY;
    5630       21107 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5631             : 
    5632             :         /* --------------------------------------------------------------------
    5633             :          */
    5634             :         /*      Transform the points from destination pixel/line coordinates */
    5635             :         /*      to source pixel/line coordinates. */
    5636             :         /* --------------------------------------------------------------------
    5637             :          */
    5638       21107 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5639             :                              padfY, padfZ, pabSuccess);
    5640       21107 :         if (dfSrcCoordPrecision > 0.0)
    5641             :         {
    5642           0 :             GWKRoundSourceCoordinates(
    5643             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5644             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5645           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5646             :         }
    5647             : 
    5648             :         /* ====================================================================
    5649             :          */
    5650             :         /*      Loop over pixels in output scanline. */
    5651             :         /* ====================================================================
    5652             :          */
    5653    43372400 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5654             :         {
    5655    43351300 :             GPtrDiff_t iSrcOffset = 0;
    5656    43351300 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5657             :                                               padfX, padfY, nSrcXSize,
    5658             :                                               nSrcYSize, iSrcOffset))
    5659    42842700 :                 continue;
    5660             : 
    5661             :             /* --------------------------------------------------------------------
    5662             :              */
    5663             :             /*      Do not try to apply transparent/invalid source pixels to the
    5664             :              */
    5665             :             /*      destination.  This currently ignores the multi-pixel input
    5666             :              */
    5667             :             /*      of bilinear and cubic resamples. */
    5668             :             /* --------------------------------------------------------------------
    5669             :              */
    5670    31298900 :             double dfDensity = 1.0;
    5671             : 
    5672    31298900 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5673             :             {
    5674     1262880 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5675     1262880 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    5676             :                 {
    5677     1261590 :                     if (!bOneSourceCornerFailsToReproject)
    5678             :                     {
    5679     1261590 :                         continue;
    5680             :                     }
    5681           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5682             :                                  psJob, iSrcOffset))
    5683             :                     {
    5684           0 :                         dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    5685             :                     }
    5686             :                     else
    5687             :                     {
    5688           0 :                         continue;
    5689             :                     }
    5690             :                 }
    5691             :             }
    5692             : 
    5693    59665900 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5694    29628600 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5695             :             {
    5696    29531000 :                 if (!bOneSourceCornerFailsToReproject)
    5697             :                 {
    5698    29528700 :                     continue;
    5699             :                 }
    5700        2229 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5701             :                 {
    5702           0 :                     continue;
    5703             :                 }
    5704             :             }
    5705             : 
    5706             :             /* ====================================================================
    5707             :              */
    5708             :             /*      Loop processing each band. */
    5709             :             /* ====================================================================
    5710             :              */
    5711      508550 :             bool bHasFoundDensity = false;
    5712             : 
    5713      508550 :             const GPtrDiff_t iDstOffset =
    5714      508550 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5715     1348560 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5716             :             {
    5717      840011 :                 double dfBandDensity = 0.0;
    5718      840011 :                 double dfValueReal = 0.0;
    5719             : 
    5720             :                 /* --------------------------------------------------------------------
    5721             :                  */
    5722             :                 /*      Collect the source value. */
    5723             :                 /* --------------------------------------------------------------------
    5724             :                  */
    5725      840011 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5726             :                     nSrcYSize == 1)
    5727             :                 {
    5728             :                     // FALSE is returned if dfBandDensity == 0, which is
    5729             :                     // checked below.
    5730        1012 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
    5731             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
    5732             :                 }
    5733      838999 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5734             :                 {
    5735        1326 :                     double dfValueImagIgnored = 0.0;
    5736        1326 :                     GWKBilinearResample4Sample(
    5737        1326 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5738        1326 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5739        1326 :                         &dfValueReal, &dfValueImagIgnored);
    5740             :                 }
    5741      837673 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5742             :                 {
    5743      299992 :                     if (bSrcMaskIsDensity)
    5744             :                     {
    5745         361 :                         if (poWK->eWorkingDataType == GDT_Byte)
    5746             :                         {
    5747         361 :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
    5748         361 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5749         361 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5750             :                                 &dfValueReal);
    5751             :                         }
    5752           0 :                         else if (poWK->eWorkingDataType == GDT_UInt16)
    5753             :                         {
    5754             :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<
    5755           0 :                                 GUInt16>(poWK, iBand,
    5756           0 :                                          padfX[iDstX] - poWK->nSrcXOff,
    5757           0 :                                          padfY[iDstX] - poWK->nSrcYOff,
    5758             :                                          &dfBandDensity, &dfValueReal);
    5759             :                         }
    5760             :                         else
    5761             :                         {
    5762           0 :                             GWKCubicResampleSrcMaskIsDensity4SampleReal(
    5763           0 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5764           0 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5765             :                                 &dfValueReal);
    5766             :                         }
    5767             :                     }
    5768             :                     else
    5769             :                     {
    5770      299631 :                         double dfValueImagIgnored = 0.0;
    5771      299631 :                         GWKCubicResample4Sample(
    5772      299631 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5773      299631 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5774             :                             &dfValueReal, &dfValueImagIgnored);
    5775      299992 :                     }
    5776             :                 }
    5777             :                 else
    5778             : #ifdef DEBUG
    5779             :                     // Only useful for clang static analyzer.
    5780      537681 :                     if (psWrkStruct != nullptr)
    5781             : #endif
    5782             :                     {
    5783      537681 :                         double dfValueImagIgnored = 0.0;
    5784      537681 :                         psWrkStruct->pfnGWKResample(
    5785      537681 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5786      537681 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5787             :                             &dfValueReal, &dfValueImagIgnored, psWrkStruct);
    5788             :                     }
    5789             : 
    5790             :                 // If we didn't find any valid inputs skip to next band.
    5791      840011 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5792           0 :                     continue;
    5793             : 
    5794      840011 :                 if (poWK->bApplyVerticalShift)
    5795             :                 {
    5796           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5797           0 :                         continue;
    5798             :                     // Subtract padfZ[] since the coordinate transformation is
    5799             :                     // from target to source
    5800           0 :                     dfValueReal =
    5801           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5802           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5803             :                 }
    5804             : 
    5805      840011 :                 bHasFoundDensity = true;
    5806             : 
    5807             :                 /* --------------------------------------------------------------------
    5808             :                  */
    5809             :                 /*      We have a computed value from the source.  Now apply it
    5810             :                  * to      */
    5811             :                 /*      the destination pixel. */
    5812             :                 /* --------------------------------------------------------------------
    5813             :                  */
    5814      840011 :                 GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
    5815             :                                      dfValueReal);
    5816             :             }
    5817             : 
    5818      508550 :             if (!bHasFoundDensity)
    5819           0 :                 continue;
    5820             : 
    5821             :             /* --------------------------------------------------------------------
    5822             :              */
    5823             :             /*      Update destination density/validity masks. */
    5824             :             /* --------------------------------------------------------------------
    5825             :              */
    5826      508550 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5827             : 
    5828      508550 :             if (poWK->panDstValid != nullptr)
    5829             :             {
    5830      101460 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5831             :             }
    5832             :         }  // Next iDstX.
    5833             : 
    5834             :         /* --------------------------------------------------------------------
    5835             :          */
    5836             :         /*      Report progress to the user, and optionally cancel out. */
    5837             :         /* --------------------------------------------------------------------
    5838             :          */
    5839       21107 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5840           0 :             break;
    5841             :     }
    5842             : 
    5843             :     /* -------------------------------------------------------------------- */
    5844             :     /*      Cleanup and return.                                             */
    5845             :     /* -------------------------------------------------------------------- */
    5846         135 :     CPLFree(padfX);
    5847         135 :     CPLFree(padfY);
    5848         135 :     CPLFree(padfZ);
    5849         135 :     CPLFree(pabSuccess);
    5850         135 :     if (psWrkStruct)
    5851         119 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5852         135 : }
    5853             : 
    5854         135 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
    5855             : {
    5856         135 :     return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
    5857             : }
    5858             : 
    5859             : /************************************************************************/
    5860             : /*                GWKResampleNoMasksOrDstDensityOnlyThreadInternal()    */
    5861             : /************************************************************************/
    5862             : 
    5863             : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
    5864        1222 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
    5865             : 
    5866             : {
    5867        1222 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    5868        1222 :     GDALWarpKernel *poWK = psJob->poWK;
    5869        1222 :     const int iYMin = psJob->iYMin;
    5870        1222 :     const int iYMax = psJob->iYMax;
    5871        1204 :     const double dfMultFactorVerticalShiftPipeline =
    5872        1222 :         poWK->bApplyVerticalShift
    5873          18 :             ? CPLAtof(CSLFetchNameValueDef(
    5874          18 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5875             :                   "1.0"))
    5876             :             : 0.0;
    5877             : 
    5878        1222 :     const int nDstXSize = poWK->nDstXSize;
    5879        1222 :     const int nSrcXSize = poWK->nSrcXSize;
    5880        1222 :     const int nSrcYSize = poWK->nSrcYSize;
    5881             : 
    5882             :     /* -------------------------------------------------------------------- */
    5883             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5884             :     /*      scanlines worth of positions.                                   */
    5885             :     /* -------------------------------------------------------------------- */
    5886             : 
    5887             :     // For x, 2 *, because we cache the precomputed values at the end.
    5888             :     double *padfX =
    5889        1222 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5890             :     double *padfY =
    5891        1222 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5892             :     double *padfZ =
    5893        1222 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5894        1222 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5895             : 
    5896        1222 :     const int nXRadius = poWK->nXRadius;
    5897             :     double *padfWeight =
    5898        1222 :         static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
    5899        1222 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5900        1222 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5901        1222 :     const double dfErrorThreshold = CPLAtof(
    5902        1222 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5903             : 
    5904             :     // Precompute values.
    5905      276172 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5906      274950 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5907             : 
    5908             :     /* ==================================================================== */
    5909             :     /*      Loop over output lines.                                         */
    5910             :     /* ==================================================================== */
    5911      131338 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5912             :     {
    5913             :         /* --------------------------------------------------------------------
    5914             :          */
    5915             :         /*      Setup points to transform to source image space. */
    5916             :         /* --------------------------------------------------------------------
    5917             :          */
    5918      130117 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5919      130117 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5920    57682838 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5921    57552683 :             padfY[iDstX] = dfY;
    5922      130117 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5923             : 
    5924             :         /* --------------------------------------------------------------------
    5925             :          */
    5926             :         /*      Transform the points from destination pixel/line coordinates */
    5927             :         /*      to source pixel/line coordinates. */
    5928             :         /* --------------------------------------------------------------------
    5929             :          */
    5930      130117 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5931             :                              padfY, padfZ, pabSuccess);
    5932      130117 :         if (dfSrcCoordPrecision > 0.0)
    5933             :         {
    5934        2500 :             GWKRoundSourceCoordinates(
    5935             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5936             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5937        2500 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5938             :         }
    5939             : 
    5940             :         /* ====================================================================
    5941             :          */
    5942             :         /*      Loop over pixels in output scanline. */
    5943             :         /* ====================================================================
    5944             :          */
    5945    57848948 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5946             :         {
    5947    57718793 :             GPtrDiff_t iSrcOffset = 0;
    5948    57718793 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5949             :                                               padfX, padfY, nSrcXSize,
    5950             :                                               nSrcYSize, iSrcOffset))
    5951     6456527 :                 continue;
    5952             : 
    5953             :             /* ====================================================================
    5954             :              */
    5955             :             /*      Loop processing each band. */
    5956             :             /* ====================================================================
    5957             :              */
    5958    51181771 :             const GPtrDiff_t iDstOffset =
    5959    51181771 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5960             : 
    5961   143410586 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5962             :             {
    5963    92148223 :                 T value = 0;
    5964             :                 if constexpr (eResample == GRA_NearestNeighbour)
    5965             :                 {
    5966    76417749 :                     value = reinterpret_cast<T *>(
    5967    76417749 :                         poWK->papabySrcImage[iBand])[iSrcOffset];
    5968             :                 }
    5969             :                 else if constexpr (bUse4SamplesFormula)
    5970             :                 {
    5971             :                     if constexpr (eResample == GRA_Bilinear)
    5972     4726176 :                         GWKBilinearResampleNoMasks4SampleT(
    5973     4726176 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5974     4726176 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    5975             :                     else
    5976     1826603 :                         GWKCubicResampleNoMasks4SampleT(
    5977     1826603 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5978     1826603 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    5979             :                 }
    5980             :                 else
    5981             :                 {
    5982     9177695 :                     GWKResampleNoMasksT(
    5983     9177695 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5984     9177695 :                         padfY[iDstX] - poWK->nSrcYOff, &value, padfWeight);
    5985             :                 }
    5986             : 
    5987    92145773 :                 if (poWK->bApplyVerticalShift)
    5988             :                 {
    5989         818 :                     if (!std::isfinite(padfZ[iDstX]))
    5990           0 :                         continue;
    5991             :                     // Subtract padfZ[] since the coordinate transformation is
    5992             :                     // from target to source
    5993       84086 :                     value = GWKClampValueT<T>(
    5994         818 :                         value * poWK->dfMultFactorVerticalShift -
    5995         818 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    5996             :                 }
    5997             : 
    5998    92228793 :                 if (poWK->pafDstDensity)
    5999    11712305 :                     poWK->pafDstDensity[iDstOffset] = 1.0f;
    6000             : 
    6001    92228793 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6002             :                     value;
    6003             :             }
    6004             :         }
    6005             : 
    6006             :         /* --------------------------------------------------------------------
    6007             :          */
    6008             :         /*      Report progress to the user, and optionally cancel out. */
    6009             :         /* --------------------------------------------------------------------
    6010             :          */
    6011      130117 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6012           1 :             break;
    6013             :     }
    6014             : 
    6015             :     /* -------------------------------------------------------------------- */
    6016             :     /*      Cleanup and return.                                             */
    6017             :     /* -------------------------------------------------------------------- */
    6018        1222 :     CPLFree(padfX);
    6019        1222 :     CPLFree(padfY);
    6020        1222 :     CPLFree(padfZ);
    6021        1222 :     CPLFree(pabSuccess);
    6022        1222 :     CPLFree(padfWeight);
    6023        1222 : }
    6024             : 
    6025             : template <class T, GDALResampleAlg eResample>
    6026         923 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
    6027             : {
    6028         923 :     GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6029             :         pData);
    6030         923 : }
    6031             : 
    6032             : template <class T, GDALResampleAlg eResample>
    6033         299 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
    6034             : 
    6035             : {
    6036         299 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6037         299 :     GDALWarpKernel *poWK = psJob->poWK;
    6038             :     static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
    6039         299 :     const bool bUse4SamplesFormula =
    6040         299 :         poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
    6041         299 :     if (bUse4SamplesFormula)
    6042         199 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
    6043             :             pData);
    6044             :     else
    6045         100 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6046             :             pData);
    6047         299 : }
    6048             : 
    6049         849 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6050             : {
    6051         849 :     return GWKRun(
    6052             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
    6053         849 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
    6054             : }
    6055             : 
    6056         128 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6057             : {
    6058         128 :     return GWKRun(
    6059             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
    6060             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
    6061         128 :                                                            GRA_Bilinear>);
    6062             : }
    6063             : 
    6064          72 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6065             : {
    6066          72 :     return GWKRun(
    6067             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
    6068          72 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
    6069             : }
    6070             : 
    6071          39 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6072             : {
    6073          39 :     return GWKRun(
    6074             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
    6075          39 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
    6076             : }
    6077             : 
    6078             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6079             : 
    6080             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6081             : {
    6082             :     return GWKRun(
    6083             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
    6084             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
    6085             : }
    6086             : #endif
    6087             : 
    6088          16 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6089             : {
    6090          16 :     return GWKRun(
    6091             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
    6092          16 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
    6093             : }
    6094             : 
    6095             : /************************************************************************/
    6096             : /*                          GWKNearestByte()                            */
    6097             : /*                                                                      */
    6098             : /*      Case for 8bit input data with nearest neighbour resampling      */
    6099             : /*      using valid flags. Should be as fast as possible for this       */
    6100             : /*      particular transformation type.                                 */
    6101             : /************************************************************************/
    6102             : 
    6103         326 : template <class T> static void GWKNearestThread(void *pData)
    6104             : 
    6105             : {
    6106         326 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6107         326 :     GDALWarpKernel *poWK = psJob->poWK;
    6108         326 :     const int iYMin = psJob->iYMin;
    6109         326 :     const int iYMax = psJob->iYMax;
    6110         326 :     const double dfMultFactorVerticalShiftPipeline =
    6111         326 :         poWK->bApplyVerticalShift
    6112           0 :             ? CPLAtof(CSLFetchNameValueDef(
    6113           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6114             :                   "1.0"))
    6115             :             : 0.0;
    6116             : 
    6117         326 :     const int nDstXSize = poWK->nDstXSize;
    6118         326 :     const int nSrcXSize = poWK->nSrcXSize;
    6119         326 :     const int nSrcYSize = poWK->nSrcYSize;
    6120             : 
    6121             :     /* -------------------------------------------------------------------- */
    6122             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6123             :     /*      scanlines worth of positions.                                   */
    6124             :     /* -------------------------------------------------------------------- */
    6125             : 
    6126             :     // For x, 2 *, because we cache the precomputed values at the end.
    6127             :     double *padfX =
    6128         326 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6129             :     double *padfY =
    6130         326 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6131             :     double *padfZ =
    6132         326 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6133         326 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6134             : 
    6135         326 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6136         326 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6137         326 :     const double dfErrorThreshold = CPLAtof(
    6138         326 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6139             : 
    6140             :     const bool bOneSourceCornerFailsToReproject =
    6141         326 :         GWKOneSourceCornerFailsToReproject(psJob);
    6142             : 
    6143             :     // Precompute values.
    6144       48735 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6145       48409 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6146             : 
    6147             :     /* ==================================================================== */
    6148             :     /*      Loop over output lines.                                         */
    6149             :     /* ==================================================================== */
    6150       35224 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6151             :     {
    6152             : 
    6153             :         /* --------------------------------------------------------------------
    6154             :          */
    6155             :         /*      Setup points to transform to source image space. */
    6156             :         /* --------------------------------------------------------------------
    6157             :          */
    6158       34898 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6159       34898 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6160     7418613 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6161     7383713 :             padfY[iDstX] = dfY;
    6162       34898 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6163             : 
    6164             :         /* --------------------------------------------------------------------
    6165             :          */
    6166             :         /*      Transform the points from destination pixel/line coordinates */
    6167             :         /*      to source pixel/line coordinates. */
    6168             :         /* --------------------------------------------------------------------
    6169             :          */
    6170       34898 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6171             :                              padfY, padfZ, pabSuccess);
    6172       34898 :         if (dfSrcCoordPrecision > 0.0)
    6173             :         {
    6174           0 :             GWKRoundSourceCoordinates(
    6175             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6176             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6177           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6178             :         }
    6179             :         /* ====================================================================
    6180             :          */
    6181             :         /*      Loop over pixels in output scanline. */
    6182             :         /* ====================================================================
    6183             :          */
    6184     7418613 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6185             :         {
    6186     7383713 :             GPtrDiff_t iSrcOffset = 0;
    6187     7383713 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6188             :                                               padfX, padfY, nSrcXSize,
    6189             :                                               nSrcYSize, iSrcOffset))
    6190     2060555 :                 continue;
    6191             : 
    6192             :             /* --------------------------------------------------------------------
    6193             :              */
    6194             :             /*      Do not try to apply invalid source pixels to the dest. */
    6195             :             /* --------------------------------------------------------------------
    6196             :              */
    6197     7305772 :             if (poWK->panUnifiedSrcValid != nullptr &&
    6198      930841 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6199             :             {
    6200       49669 :                 if (!bOneSourceCornerFailsToReproject)
    6201             :                 {
    6202       42184 :                     continue;
    6203             :                 }
    6204        7485 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    6205             :                 {
    6206        5224 :                     continue;
    6207             :                 }
    6208             :             }
    6209             : 
    6210             :             /* --------------------------------------------------------------------
    6211             :              */
    6212             :             /*      Do not try to apply transparent source pixels to the
    6213             :              * destination.*/
    6214             :             /* --------------------------------------------------------------------
    6215             :              */
    6216     6327520 :             double dfDensity = 1.0;
    6217             : 
    6218     6327520 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    6219             :             {
    6220     1162245 :                 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
    6221     1162245 :                 if (dfDensity < SRC_DENSITY_THRESHOLD)
    6222     1004371 :                     continue;
    6223             :             }
    6224             : 
    6225             :             /* ====================================================================
    6226             :              */
    6227             :             /*      Loop processing each band. */
    6228             :             /* ====================================================================
    6229             :              */
    6230             : 
    6231     5323148 :             const GPtrDiff_t iDstOffset =
    6232     5323148 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6233             : 
    6234    12436786 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6235             :             {
    6236     7113618 :                 T value = 0;
    6237     7113618 :                 double dfBandDensity = 0.0;
    6238             : 
    6239             :                 /* --------------------------------------------------------------------
    6240             :                  */
    6241             :                 /*      Collect the source value. */
    6242             :                 /* --------------------------------------------------------------------
    6243             :                  */
    6244     7113618 :                 if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
    6245             :                                  &value))
    6246             :                 {
    6247             : 
    6248     7113608 :                     if (poWK->bApplyVerticalShift)
    6249             :                     {
    6250           0 :                         if (!std::isfinite(padfZ[iDstX]))
    6251           0 :                             continue;
    6252             :                         // Subtract padfZ[] since the coordinate transformation
    6253             :                         // is from target to source
    6254           0 :                         value = GWKClampValueT<T>(
    6255           0 :                             value * poWK->dfMultFactorVerticalShift -
    6256           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6257             :                     }
    6258             : 
    6259     7113608 :                     if (dfBandDensity < 1.0)
    6260             :                     {
    6261      159076 :                         if (dfBandDensity == 0.0)
    6262             :                         {
    6263             :                             // Do nothing.
    6264             :                         }
    6265             :                         else
    6266             :                         {
    6267             :                             // Let the general code take care of mixing.
    6268      159076 :                             GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    6269             :                                                   dfBandDensity, value);
    6270             :                         }
    6271             :                     }
    6272             :                     else
    6273             :                     {
    6274     6954527 :                         reinterpret_cast<T *>(
    6275     6954527 :                             poWK->papabyDstImage[iBand])[iDstOffset] = value;
    6276             :                     }
    6277             :                 }
    6278             :             }
    6279             : 
    6280             :             /* --------------------------------------------------------------------
    6281             :              */
    6282             :             /*      Mark this pixel valid/opaque in the output. */
    6283             :             /* --------------------------------------------------------------------
    6284             :              */
    6285     5323148 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6286             : 
    6287     5323148 :             if (poWK->panDstValid != nullptr)
    6288             :             {
    6289     4643710 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6290             :             }
    6291             :         } /* Next iDstX */
    6292             : 
    6293             :         /* --------------------------------------------------------------------
    6294             :          */
    6295             :         /*      Report progress to the user, and optionally cancel out. */
    6296             :         /* --------------------------------------------------------------------
    6297             :          */
    6298       34898 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6299           0 :             break;
    6300             :     }
    6301             : 
    6302             :     /* -------------------------------------------------------------------- */
    6303             :     /*      Cleanup and return.                                             */
    6304             :     /* -------------------------------------------------------------------- */
    6305         326 :     CPLFree(padfX);
    6306         326 :     CPLFree(padfY);
    6307         326 :     CPLFree(padfZ);
    6308         326 :     CPLFree(pabSuccess);
    6309         326 : }
    6310             : 
    6311         269 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
    6312             : {
    6313         269 :     return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
    6314             : }
    6315             : 
    6316          24 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6317             : {
    6318          24 :     return GWKRun(
    6319             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
    6320          24 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
    6321             : }
    6322             : 
    6323          21 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6324             : {
    6325          21 :     return GWKRun(
    6326             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
    6327             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
    6328          21 :                                                            GRA_Bilinear>);
    6329             : }
    6330             : 
    6331           9 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6332             : {
    6333           9 :     return GWKRun(
    6334             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
    6335             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
    6336           9 :                                                            GRA_Bilinear>);
    6337             : }
    6338             : 
    6339           5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6340             : {
    6341           5 :     return GWKRun(
    6342             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
    6343             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
    6344           5 :                                                            GRA_Bilinear>);
    6345             : }
    6346             : 
    6347             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6348             : 
    6349             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6350             : {
    6351             :     return GWKRun(
    6352             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
    6353             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
    6354             :                                                            GRA_Bilinear>);
    6355             : }
    6356             : #endif
    6357             : 
    6358           8 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6359             : {
    6360           8 :     return GWKRun(
    6361             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
    6362           8 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
    6363             : }
    6364             : 
    6365          14 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6366             : {
    6367          14 :     return GWKRun(
    6368             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
    6369          14 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
    6370             : }
    6371             : 
    6372           9 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6373             : {
    6374           9 :     return GWKRun(
    6375             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
    6376           9 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
    6377             : }
    6378             : 
    6379           8 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6380             : {
    6381           8 :     return GWKRun(
    6382             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
    6383           8 :         GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
    6384             : }
    6385             : 
    6386          22 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
    6387             : {
    6388          22 :     return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
    6389             : }
    6390             : 
    6391          14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6392             : {
    6393          14 :     return GWKRun(
    6394             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
    6395          14 :         GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
    6396             : }
    6397             : 
    6398          31 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
    6399             : {
    6400          31 :     return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
    6401             : }
    6402             : 
    6403             : /************************************************************************/
    6404             : /*                           GWKAverageOrMode()                         */
    6405             : /*                                                                      */
    6406             : /************************************************************************/
    6407             : 
    6408             : static void GWKAverageOrModeThread(void *pData);
    6409             : 
    6410         117 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
    6411             : {
    6412         117 :     return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
    6413             : }
    6414             : 
    6415             : // Overall logic based on GWKGeneralCaseThread().
    6416         117 : static void GWKAverageOrModeThread(void *pData)
    6417             : {
    6418         117 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6419         117 :     GDALWarpKernel *poWK = psJob->poWK;
    6420         117 :     const int iYMin = psJob->iYMin;
    6421         117 :     const int iYMax = psJob->iYMax;
    6422             :     const double dfMultFactorVerticalShiftPipeline =
    6423         117 :         poWK->bApplyVerticalShift
    6424         117 :             ? CPLAtof(CSLFetchNameValueDef(
    6425           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6426             :                   "1.0"))
    6427         117 :             : 0.0;
    6428             : 
    6429         117 :     const int nDstXSize = poWK->nDstXSize;
    6430         117 :     const int nSrcXSize = poWK->nSrcXSize;
    6431         117 :     const int nSrcYSize = poWK->nSrcYSize;
    6432             : 
    6433             :     /* -------------------------------------------------------------------- */
    6434             :     /*      Find out which algorithm to use (small optim.)                  */
    6435             :     /* -------------------------------------------------------------------- */
    6436         117 :     int nAlgo = 0;
    6437             : 
    6438             :     // These vars only used with nAlgo == 3.
    6439         117 :     int *panVals = nullptr;
    6440         117 :     int nBins = 0;
    6441         117 :     int nBinsOffset = 0;
    6442             : 
    6443             :     // Only used with nAlgo = 2.
    6444         117 :     float *pafRealVals = nullptr;
    6445         117 :     float *pafImagVals = nullptr;
    6446         117 :     int *panRealSums = nullptr;
    6447         117 :     int *panImagSums = nullptr;
    6448             : 
    6449             :     // Only used with nAlgo = 6.
    6450         117 :     float quant = 0.5;
    6451             : 
    6452             :     // To control array allocation only when data type is complex
    6453         117 :     const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
    6454             : 
    6455         117 :     if (poWK->eResample == GRA_Average)
    6456             :     {
    6457          70 :         nAlgo = GWKAOM_Average;
    6458             :     }
    6459          47 :     else if (poWK->eResample == GRA_RMS)
    6460             :     {
    6461           9 :         nAlgo = GWKAOM_RMS;
    6462             :     }
    6463          38 :     else if (poWK->eResample == GRA_Mode)
    6464             :     {
    6465             :         // TODO check color table count > 256.
    6466          11 :         if (poWK->eWorkingDataType == GDT_Byte ||
    6467           5 :             poWK->eWorkingDataType == GDT_UInt16 ||
    6468           5 :             poWK->eWorkingDataType == GDT_Int16)
    6469             :         {
    6470           9 :             nAlgo = GWKAOM_Imode;
    6471             : 
    6472             :             // In the case of a paletted or non-paletted byte band,
    6473             :             // Input values are between 0 and 255.
    6474           9 :             if (poWK->eWorkingDataType == GDT_Byte)
    6475             :             {
    6476           6 :                 nBins = 256;
    6477             :             }
    6478             :             // In the case of Int8, input values are between -128 and 127.
    6479           3 :             else if (poWK->eWorkingDataType == GDT_Int8)
    6480             :             {
    6481           0 :                 nBins = 256;
    6482           0 :                 nBinsOffset = 128;
    6483             :             }
    6484             :             // In the case of Int16, input values are between -32768 and 32767.
    6485           3 :             else if (poWK->eWorkingDataType == GDT_Int16)
    6486             :             {
    6487           3 :                 nBins = 65536;
    6488           3 :                 nBinsOffset = 32768;
    6489             :             }
    6490             :             // In the case of UInt16, input values are between 0 and 65537.
    6491           0 :             else if (poWK->eWorkingDataType == GDT_UInt16)
    6492             :             {
    6493           0 :                 nBins = 65536;
    6494             :             }
    6495             :             panVals =
    6496           9 :                 static_cast<int *>(VSI_MALLOC_VERBOSE(nBins * sizeof(int)));
    6497           9 :             if (panVals == nullptr)
    6498           0 :                 return;
    6499             :         }
    6500             :         else
    6501             :         {
    6502           2 :             nAlgo = GWKAOM_Fmode;
    6503             : 
    6504           2 :             if (nSrcXSize > 0 && nSrcYSize > 0)
    6505             :             {
    6506             :                 pafRealVals = static_cast<float *>(
    6507           2 :                     VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    6508             :                 panRealSums = static_cast<int *>(
    6509           2 :                     VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(int)));
    6510           2 :                 if (pafRealVals == nullptr || panRealSums == nullptr)
    6511             :                 {
    6512           0 :                     VSIFree(pafRealVals);
    6513           0 :                     VSIFree(panRealSums);
    6514           0 :                     return;
    6515             :                 }
    6516             :             }
    6517             :         }
    6518             :     }
    6519          27 :     else if (poWK->eResample == GRA_Max)
    6520             :     {
    6521           6 :         nAlgo = GWKAOM_Max;
    6522             :     }
    6523          21 :     else if (poWK->eResample == GRA_Min)
    6524             :     {
    6525           5 :         nAlgo = GWKAOM_Min;
    6526             :     }
    6527          16 :     else if (poWK->eResample == GRA_Med)
    6528             :     {
    6529           6 :         nAlgo = GWKAOM_Quant;
    6530           6 :         quant = 0.5;
    6531             :     }
    6532          10 :     else if (poWK->eResample == GRA_Q1)
    6533             :     {
    6534           5 :         nAlgo = GWKAOM_Quant;
    6535           5 :         quant = 0.25;
    6536             :     }
    6537           5 :     else if (poWK->eResample == GRA_Q3)
    6538             :     {
    6539           5 :         nAlgo = GWKAOM_Quant;
    6540           5 :         quant = 0.75;
    6541             :     }
    6542             : #ifdef disabled
    6543             :     else if (poWK->eResample == GRA_Sum)
    6544             :     {
    6545             :         nAlgo = GWKAOM_Sum;
    6546             :     }
    6547             : #endif
    6548             :     else
    6549             :     {
    6550             :         // Other resample algorithms not permitted here.
    6551           0 :         CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
    6552             :                          "illegal resample");
    6553           0 :         return;
    6554             :     }
    6555             : 
    6556         117 :     CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() using algo %d",
    6557             :              nAlgo);
    6558             : 
    6559             :     /* -------------------------------------------------------------------- */
    6560             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    6561             :     /*      scanlines worth of positions.                                   */
    6562             :     /* -------------------------------------------------------------------- */
    6563             : 
    6564             :     double *padfX =
    6565         117 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6566             :     double *padfY =
    6567         117 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6568             :     double *padfZ =
    6569         117 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6570             :     double *padfX2 =
    6571         117 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6572             :     double *padfY2 =
    6573         117 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6574             :     double *padfZ2 =
    6575         117 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6576         117 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6577         117 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6578             : 
    6579         117 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6580         117 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6581         117 :     const double dfErrorThreshold = CPLAtof(
    6582         117 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6583             : 
    6584             :     const double dfExcludedValuesThreshold =
    6585         117 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    6586             :                                      "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
    6587         117 :         100.0;
    6588             :     const double dfNodataValuesThreshold =
    6589         117 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    6590             :                                      "NODATA_VALUES_PCT_THRESHOLD", "100")) /
    6591         117 :         100.0;
    6592             : 
    6593             :     const int nXMargin =
    6594         117 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    6595             :     const int nYMargin =
    6596         117 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    6597             : 
    6598             :     /* ==================================================================== */
    6599             :     /*      Loop over output lines.                                         */
    6600             :     /* ==================================================================== */
    6601        6001 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6602             :     {
    6603             : 
    6604             :         /* --------------------------------------------------------------------
    6605             :          */
    6606             :         /*      Setup points to transform to source image space. */
    6607             :         /* --------------------------------------------------------------------
    6608             :          */
    6609     1429210 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6610             :         {
    6611     1423320 :             padfX[iDstX] = iDstX + poWK->nDstXOff;
    6612     1423320 :             padfY[iDstX] = iDstY + poWK->nDstYOff;
    6613     1423320 :             padfZ[iDstX] = 0.0;
    6614     1423320 :             padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
    6615     1423320 :             padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
    6616     1423320 :             padfZ2[iDstX] = 0.0;
    6617             :         }
    6618             : 
    6619             :         /* --------------------------------------------------------------------
    6620             :          */
    6621             :         /*      Transform the points from destination pixel/line coordinates */
    6622             :         /*      to source pixel/line coordinates. */
    6623             :         /* --------------------------------------------------------------------
    6624             :          */
    6625        5884 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6626             :                              padfY, padfZ, pabSuccess);
    6627        5884 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
    6628             :                              padfY2, padfZ2, pabSuccess2);
    6629             : 
    6630        5884 :         if (dfSrcCoordPrecision > 0.0)
    6631             :         {
    6632           0 :             GWKRoundSourceCoordinates(
    6633             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6634             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6635           0 :                 poWK->nDstXOff, iDstY + poWK->nDstYOff);
    6636           0 :             GWKRoundSourceCoordinates(
    6637             :                 nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2,
    6638             :                 dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
    6639           0 :                 psJob->pTransformerArg, 1.0 + poWK->nDstXOff,
    6640           0 :                 iDstY + 1.0 + poWK->nDstYOff);
    6641             :         }
    6642             : 
    6643             :         /* ====================================================================
    6644             :          */
    6645             :         /*      Loop over pixels in output scanline. */
    6646             :         /* ====================================================================
    6647             :          */
    6648     1429210 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6649             :         {
    6650     1423320 :             GPtrDiff_t iSrcOffset = 0;
    6651     1423320 :             double dfDensity = 1.0;
    6652     1423320 :             bool bHasFoundDensity = false;
    6653             : 
    6654     1423320 :             if (!pabSuccess[iDstX] || !pabSuccess2[iDstX])
    6655      311460 :                 continue;
    6656             : 
    6657             :             // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
    6658             :             // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
    6659     1423320 :             if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6660     1423310 :                   padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6661     1423310 :                   padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6662     1423290 :                   padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6663     1423290 :                   padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6664     1423290 :                   padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6665     1423280 :                   padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
    6666     1423280 :                   padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
    6667             :             {
    6668          62 :                 continue;
    6669             :             }
    6670             : 
    6671     1423260 :             const GPtrDiff_t iDstOffset =
    6672     1423260 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6673             : 
    6674             :             // Compute corners in source crs.
    6675             : 
    6676             :             // The transformation might not have preserved ordering of
    6677             :             // coordinates so do the necessary swapping (#5433).
    6678             :             // NOTE: this is really an approximative fix. To do something
    6679             :             // more precise we would for example need to compute the
    6680             :             // transformation of coordinates in the
    6681             :             // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
    6682             :             // coordinates, and take the bounding box of the got source
    6683             :             // coordinates.
    6684             : 
    6685     1423260 :             if (padfX[iDstX] > padfX2[iDstX])
    6686      268744 :                 std::swap(padfX[iDstX], padfX2[iDstX]);
    6687             : 
    6688             :             // Detect situations where the target pixel is close to the
    6689             :             // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
    6690             :             // close to the left-most and right-most columns of the source
    6691             :             // raster. The 2 value below was experimentally determined to
    6692             :             // avoid false-positives and false-negatives.
    6693             :             // Addresses https://github.com/OSGeo/gdal/issues/6478
    6694     1423260 :             bool bWrapOverX = false;
    6695     1423260 :             const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
    6696     1423260 :             if (poWK->nSrcXOff == 0 &&
    6697     1423260 :                 padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
    6698       13274 :                 (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale <
    6699             :                     nThresholdWrapOverX)
    6700             :             {
    6701        1040 :                 bWrapOverX = true;
    6702        1040 :                 std::swap(padfX[iDstX], padfX2[iDstX]);
    6703        1040 :                 padfX2[iDstX] += nSrcXSize;
    6704             :             }
    6705             : 
    6706     1423260 :             const double dfXMin = padfX[iDstX] - poWK->nSrcXOff;
    6707     1423260 :             const double dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
    6708     1423260 :             constexpr double EPS = 1e-10;
    6709             :             // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
    6710     1423260 :             if (!(dfXMax > -EPS && dfXMin < nSrcXSize + EPS))
    6711          72 :                 continue;
    6712     1423190 :             int iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPS), 0.0));
    6713     1423190 :             int iSrcXMax = static_cast<int>(
    6714     1423190 :                 std::min(ceil(dfXMax - EPS), static_cast<double>(INT_MAX)));
    6715     1423190 :             if (!bWrapOverX)
    6716     1422150 :                 iSrcXMax = std::min(iSrcXMax, nSrcXSize);
    6717     1423190 :             if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
    6718         472 :                 iSrcXMax++;
    6719             : 
    6720     1423190 :             if (padfY[iDstX] > padfY2[iDstX])
    6721      270107 :                 std::swap(padfY[iDstX], padfY2[iDstX]);
    6722     1423190 :             const double dfYMin = padfY[iDstX] - poWK->nSrcYOff;
    6723     1423190 :             const double dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
    6724             :             // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
    6725     1423190 :             if (!(dfYMax > -EPS && dfYMin < nSrcYSize + EPS))
    6726          36 :                 continue;
    6727     1423160 :             int iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPS), 0.0));
    6728             :             int iSrcYMax =
    6729     1423160 :                 std::min(static_cast<int>(ceil(dfYMax - EPS)), nSrcYSize);
    6730     1423160 :             if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
    6731           0 :                 iSrcYMax++;
    6732             : 
    6733             : #define COMPUTE_WEIGHT_Y(iSrcY)                                                \
    6734             :     ((iSrcY == iSrcYMin)                                                       \
    6735             :          ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin))        \
    6736             :      : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax)                       \
    6737             :                                : 1.0)
    6738             : 
    6739             : #define COMPUTE_WEIGHT(iSrcX, dfWeightY)                                       \
    6740             :     ((iSrcX == iSrcXMin)       ? ((iSrcXMin + 1 == iSrcXMax)                   \
    6741             :                                       ? dfWeightY                              \
    6742             :                                       : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
    6743             :      : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax))         \
    6744             :                                : dfWeightY)
    6745             : 
    6746     1423160 :             bool bDone = false;
    6747             : 
    6748             :             // Special Average mode where we process all bands together,
    6749             :             // to avoid averaging tuples that match an entry of m_aadfExcludedValues
    6750     1947230 :             if (nAlgo == GWKAOM_Average &&
    6751      524072 :                 (!poWK->m_aadfExcludedValues.empty() ||
    6752      393224 :                  dfNodataValuesThreshold < 1 - EPS) &&
    6753     1947230 :                 !poWK->bApplyVerticalShift && !bIsComplex)
    6754             :             {
    6755      393224 :                 double dfTotalWeightInvalid = 0.0;
    6756      393224 :                 double dfTotalWeightExcluded = 0.0;
    6757      393224 :                 double dfTotalWeightRegular = 0.0;
    6758      786448 :                 std::vector<double> adfValueReal(poWK->nBands, 0);
    6759      786448 :                 std::vector<double> adfValueAveraged(poWK->nBands, 0);
    6760             :                 std::vector<int> anCountExcludedValues(
    6761      393224 :                     poWK->m_aadfExcludedValues.size(), 0);
    6762             : 
    6763     1572890 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    6764             :                 {
    6765     1179660 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    6766     1179660 :                     iSrcOffset =
    6767     1179660 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    6768     5111860 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    6769             :                          iSrcX++, iSrcOffset++)
    6770             :                     {
    6771     3932190 :                         if (bWrapOverX)
    6772           0 :                             iSrcOffset =
    6773           0 :                                 (iSrcX % nSrcXSize) +
    6774           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    6775             : 
    6776     3932190 :                         const double dfWeight =
    6777     3932190 :                             COMPUTE_WEIGHT(iSrcX, dfWeightY);
    6778     3932190 :                         if (dfWeight <= 0)
    6779           0 :                             continue;
    6780             : 
    6781     3932200 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    6782          12 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6783             :                         {
    6784           3 :                             dfTotalWeightInvalid += dfWeight;
    6785           3 :                             continue;
    6786             :                         }
    6787             : 
    6788     3932190 :                         bool bAllValid = true;
    6789     7274700 :                         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6790             :                         {
    6791     6160450 :                             double dfBandDensity = 0;
    6792     6160450 :                             double dfValueImagTmp = 0;
    6793     9502960 :                             if (!(GWKGetPixelValue(
    6794             :                                       poWK, iBand, iSrcOffset, &dfBandDensity,
    6795     6160450 :                                       &adfValueReal[iBand], &dfValueImagTmp) &&
    6796     3342510 :                                   dfBandDensity > BAND_DENSITY_THRESHOLD))
    6797             :                             {
    6798     2817950 :                                 bAllValid = false;
    6799     2817950 :                                 break;
    6800             :                             }
    6801             :                         }
    6802             : 
    6803     3932190 :                         if (!bAllValid)
    6804             :                         {
    6805     2817950 :                             dfTotalWeightInvalid += dfWeight;
    6806     2817950 :                             continue;
    6807             :                         }
    6808             : 
    6809     1114240 :                         bool bExcludedValueFound = false;
    6810     2228460 :                         for (size_t i = 0;
    6811     2228460 :                              i < poWK->m_aadfExcludedValues.size(); ++i)
    6812             :                         {
    6813     1114230 :                             if (poWK->m_aadfExcludedValues[i] == adfValueReal)
    6814             :                             {
    6815          21 :                                 bExcludedValueFound = true;
    6816          21 :                                 ++anCountExcludedValues[i];
    6817          21 :                                 dfTotalWeightExcluded += dfWeight;
    6818          21 :                                 break;
    6819             :                             }
    6820             :                         }
    6821     1114240 :                         if (!bExcludedValueFound)
    6822             :                         {
    6823             :                             // Weighted incremental algorithm mean
    6824             :                             // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    6825     1114220 :                             dfTotalWeightRegular += dfWeight;
    6826     4456670 :                             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6827             :                             {
    6828     3342440 :                                 adfValueAveraged[iBand] +=
    6829     6684890 :                                     (dfWeight / dfTotalWeightRegular) *
    6830     6684890 :                                     (adfValueReal[iBand] -
    6831     3342440 :                                      adfValueAveraged[iBand]);
    6832             :                             }
    6833             :                         }
    6834             :                     }
    6835             :                 }
    6836             : 
    6837      393224 :                 const double dfTotalWeight = dfTotalWeightInvalid +
    6838             :                                              dfTotalWeightExcluded +
    6839             :                                              dfTotalWeightRegular;
    6840      393224 :                 if (dfTotalWeightInvalid > 0 &&
    6841             :                     dfTotalWeightInvalid >=
    6842      311293 :                         dfNodataValuesThreshold * dfTotalWeight)
    6843             :                 {
    6844             :                     // Do nothing. Let bHasFoundDensity to false.
    6845             :                 }
    6846       81934 :                 else if (dfTotalWeightExcluded > 0 &&
    6847             :                          dfTotalWeightExcluded >=
    6848           6 :                              dfExcludedValuesThreshold * dfTotalWeight)
    6849             :                 {
    6850             :                     // Find the most represented excluded value tuple
    6851           3 :                     size_t iExcludedValue = 0;
    6852           3 :                     int nExcludedValueCount = 0;
    6853           6 :                     for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
    6854             :                          ++i)
    6855             :                     {
    6856           3 :                         if (anCountExcludedValues[i] > nExcludedValueCount)
    6857             :                         {
    6858           3 :                             iExcludedValue = i;
    6859           3 :                             nExcludedValueCount = anCountExcludedValues[i];
    6860             :                         }
    6861             :                     }
    6862             : 
    6863           3 :                     bHasFoundDensity = true;
    6864             : 
    6865          12 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6866             :                     {
    6867           9 :                         GWKSetPixelValue(
    6868             :                             poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
    6869           9 :                             poWK->m_aadfExcludedValues[iExcludedValue][iBand],
    6870             :                             0);
    6871           3 :                     }
    6872             :                 }
    6873       81931 :                 else if (dfTotalWeightRegular > 0)
    6874             :                 {
    6875       81931 :                     bHasFoundDensity = true;
    6876             : 
    6877      327706 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6878             :                     {
    6879      245775 :                         GWKSetPixelValue(poWK, iBand, iDstOffset,
    6880             :                                          /* dfBandDensity = */ 1.0,
    6881      245775 :                                          adfValueAveraged[iBand], 0);
    6882             :                     }
    6883             :                 }
    6884             : 
    6885             :                 // Skip below loop on bands
    6886      393224 :                 bDone = true;
    6887             :             }
    6888             : 
    6889             :             /* ====================================================================
    6890             :              */
    6891             :             /*      Loop processing each band. */
    6892             :             /* ====================================================================
    6893             :              */
    6894             : 
    6895     3959520 :             for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
    6896             :             {
    6897     2536360 :                 double dfBandDensity = 0.0;
    6898     2536360 :                 double dfValueReal = 0.0;
    6899     2536360 :                 double dfValueImag = 0.0;
    6900     2536360 :                 double dfValueRealTmp = 0.0;
    6901     2536360 :                 double dfValueImagTmp = 0.0;
    6902             : 
    6903             :                 /* --------------------------------------------------------------------
    6904             :                  */
    6905             :                 /*      Collect the source value. */
    6906             :                 /* --------------------------------------------------------------------
    6907             :                  */
    6908             : 
    6909             :                 // Loop over source lines and pixels - 3 possible algorithms.
    6910             : 
    6911             :                 // poWK->eResample == GRA_Average.
    6912     2536360 :                 if (nAlgo == GWKAOM_Average)
    6913             :                 {
    6914      220848 :                     double dfTotalWeight = 0.0;
    6915             : 
    6916             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    6917             :                     // in gcore/overview.cpp.
    6918      471297 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    6919             :                     {
    6920      250449 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    6921      250449 :                         iSrcOffset = iSrcXMin +
    6922      250449 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    6923      643090 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    6924             :                              iSrcX++, iSrcOffset++)
    6925             :                         {
    6926      392641 :                             if (bWrapOverX)
    6927        1030 :                                 iSrcOffset =
    6928        1030 :                                     (iSrcX % nSrcXSize) +
    6929        1030 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    6930             : 
    6931      392645 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    6932           4 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    6933             :                                             iSrcOffset))
    6934             :                             {
    6935           1 :                                 continue;
    6936             :                             }
    6937             : 
    6938      392640 :                             if (GWKGetPixelValue(
    6939             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    6940      785280 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    6941      392640 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    6942             :                             {
    6943      392640 :                                 const double dfWeight =
    6944      392640 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    6945      392640 :                                 if (dfWeight > 0)
    6946             :                                 {
    6947             :                                     // Weighted incremental algorithm mean
    6948             :                                     // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    6949      392640 :                                     dfTotalWeight += dfWeight;
    6950      392640 :                                     dfValueReal +=
    6951      392640 :                                         (dfWeight / dfTotalWeight) *
    6952      392640 :                                         (dfValueRealTmp - dfValueReal);
    6953      392640 :                                     if (bIsComplex)
    6954             :                                     {
    6955         252 :                                         dfValueImag +=
    6956         252 :                                             (dfWeight / dfTotalWeight) *
    6957         252 :                                             (dfValueImagTmp - dfValueImag);
    6958             :                                     }
    6959             :                                 }
    6960             :                             }
    6961             :                         }
    6962             :                     }
    6963             : 
    6964      220848 :                     if (dfTotalWeight > 0)
    6965             :                     {
    6966      220848 :                         if (poWK->bApplyVerticalShift)
    6967             :                         {
    6968           0 :                             if (!std::isfinite(padfZ[iDstX]))
    6969           0 :                                 continue;
    6970             :                             // Subtract padfZ[] since the coordinate
    6971             :                             // transformation is from target to source
    6972           0 :                             dfValueReal =
    6973           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    6974           0 :                                 padfZ[iDstX] *
    6975             :                                     dfMultFactorVerticalShiftPipeline;
    6976             :                         }
    6977             : 
    6978      220848 :                         dfBandDensity = 1;
    6979      220848 :                         bHasFoundDensity = true;
    6980             :                     }
    6981             :                 }  // GRA_Average.
    6982             :                 // poWK->eResample == GRA_RMS.
    6983     2536360 :                 if (nAlgo == GWKAOM_RMS)
    6984             :                 {
    6985      220416 :                     double dfTotalReal = 0.0;
    6986      220416 :                     double dfTotalImag = 0.0;
    6987      220416 :                     double dfTotalWeight = 0.0;
    6988             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    6989             :                     // in gcore/overview.cpp.
    6990      470578 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    6991             :                     {
    6992      250162 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    6993      250162 :                         iSrcOffset = iSrcXMin +
    6994      250162 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    6995      642723 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    6996             :                              iSrcX++, iSrcOffset++)
    6997             :                         {
    6998      392561 :                             if (bWrapOverX)
    6999        1030 :                                 iSrcOffset =
    7000        1030 :                                     (iSrcX % nSrcXSize) +
    7001        1030 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7002             : 
    7003      392561 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7004           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7005             :                                             iSrcOffset))
    7006             :                             {
    7007           0 :                                 continue;
    7008             :                             }
    7009             : 
    7010      392561 :                             if (GWKGetPixelValue(
    7011             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7012      785122 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7013      392561 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7014             :                             {
    7015      392561 :                                 const double dfWeight =
    7016      392561 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7017      392561 :                                 dfTotalWeight += dfWeight;
    7018      392561 :                                 dfTotalReal +=
    7019      392561 :                                     dfValueRealTmp * dfValueRealTmp * dfWeight;
    7020      392561 :                                 if (bIsComplex)
    7021          48 :                                     dfTotalImag += dfValueImagTmp *
    7022          48 :                                                    dfValueImagTmp * dfWeight;
    7023             :                             }
    7024             :                         }
    7025             :                     }
    7026             : 
    7027      220416 :                     if (dfTotalWeight > 0)
    7028             :                     {
    7029      220416 :                         dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
    7030             : 
    7031      220416 :                         if (poWK->bApplyVerticalShift)
    7032             :                         {
    7033           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7034           0 :                                 continue;
    7035             :                             // Subtract padfZ[] since the coordinate
    7036             :                             // transformation is from target to source
    7037           0 :                             dfValueReal =
    7038           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7039           0 :                                 padfZ[iDstX] *
    7040             :                                     dfMultFactorVerticalShiftPipeline;
    7041             :                         }
    7042             : 
    7043      220416 :                         if (bIsComplex)
    7044          12 :                             dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
    7045             : 
    7046      220416 :                         dfBandDensity = 1;
    7047      220416 :                         bHasFoundDensity = true;
    7048             :                     }
    7049             :                 }  // GRA_RMS.
    7050             : #ifdef disabled
    7051             :                 else if (nAlgo == GWKAOM_Sum)
    7052             :                 // poWK->eResample == GRA_Sum
    7053             :                 {
    7054             :                     double dfTotalReal = 0.0;
    7055             :                     double dfTotalImag = 0.0;
    7056             :                     bool bFoundValid = false;
    7057             : 
    7058             :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7059             :                     {
    7060             :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7061             :                         iSrcOffset = iSrcXMin +
    7062             :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7063             :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7064             :                              iSrcX++, iSrcOffset++)
    7065             :                         {
    7066             :                             if (bWrapOverX)
    7067             :                                 iSrcOffset =
    7068             :                                     (iSrcX % nSrcXSize) +
    7069             :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7070             : 
    7071             :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7072             :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7073             :                                             iSrcOffset))
    7074             :                             {
    7075             :                                 continue;
    7076             :                             }
    7077             : 
    7078             :                             if (GWKGetPixelValue(
    7079             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7080             :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7081             :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7082             :                             {
    7083             :                                 const double dfWeight =
    7084             :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7085             :                                 bFoundValid = true;
    7086             :                                 dfTotalReal += dfValueRealTmp * dfWeight;
    7087             :                                 if (bIsComplex)
    7088             :                                 {
    7089             :                                     dfTotalImag += dfValueImagTmp * dfWeight;
    7090             :                                 }
    7091             :                             }
    7092             :                         }
    7093             :                     }
    7094             : 
    7095             :                     if (bFoundValid)
    7096             :                     {
    7097             :                         dfValueReal = dfTotalReal;
    7098             : 
    7099             :                         if (poWK->bApplyVerticalShift)
    7100             :                         {
    7101             :                             if (!std::isfinite(padfZ[iDstX]))
    7102             :                                 continue;
    7103             :                             // Subtract padfZ[] since the coordinate
    7104             :                             // transformation is from target to source
    7105             :                             dfValueReal =
    7106             :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7107             :                                 padfZ[iDstX] *
    7108             :                                     dfMultFactorVerticalShiftPipeline;
    7109             :                         }
    7110             : 
    7111             :                         if (bIsComplex)
    7112             :                         {
    7113             :                             dfValueImag = dfTotalImag;
    7114             :                         }
    7115             :                         dfBandDensity = 1;
    7116             :                         bHasFoundDensity = true;
    7117             :                     }
    7118             :                 }  // GRA_Sum.
    7119             : #endif
    7120     2315950 :                 else if (nAlgo == GWKAOM_Imode || nAlgo == GWKAOM_Fmode)
    7121             :                 // poWK->eResample == GRA_Mode
    7122             :                 {
    7123             :                     // This code adapted from GDALDownsampleChunk32R_Mode() in
    7124             :                     // gcore/overview.cpp.
    7125      420014 :                     if (nAlgo == GWKAOM_Fmode)  // int32 or float.
    7126             :                     {
    7127             :                         // Does it make sense it makes to run a
    7128             :                         // majority filter on floating point data? But, here it
    7129             :                         // is for the sake of compatibility. It won't look
    7130             :                         // right on RGB images by the nature of the filter.
    7131        3400 :                         int iMaxInd = 0;
    7132        3400 :                         int iMaxVal = -1;
    7133        3400 :                         int i = 0;
    7134             : 
    7135       10200 :                         for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7136             :                         {
    7137        6800 :                             iSrcOffset =
    7138        6800 :                                 iSrcXMin +
    7139        6800 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7140       20400 :                             for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7141             :                                  iSrcX++, iSrcOffset++)
    7142             :                             {
    7143       13600 :                                 if (bWrapOverX)
    7144           0 :                                     iSrcOffset =
    7145           0 :                                         (iSrcX % nSrcXSize) +
    7146           0 :                                         static_cast<GPtrDiff_t>(iSrcY) *
    7147           0 :                                             nSrcXSize;
    7148             : 
    7149       13600 :                                 if (poWK->panUnifiedSrcValid != nullptr &&
    7150           0 :                                     !CPLMaskGet(poWK->panUnifiedSrcValid,
    7151             :                                                 iSrcOffset))
    7152           0 :                                     continue;
    7153             : 
    7154       13600 :                                 if (GWKGetPixelValue(
    7155             :                                         poWK, iBand, iSrcOffset, &dfBandDensity,
    7156       27200 :                                         &dfValueRealTmp, &dfValueImagTmp) &&
    7157       13600 :                                     dfBandDensity > BAND_DENSITY_THRESHOLD)
    7158             :                                 {
    7159       13600 :                                     const float fVal =
    7160       13600 :                                         static_cast<float>(dfValueRealTmp);
    7161             : 
    7162             :                                     // Check array for existing entry.
    7163       32685 :                                     for (i = 0; i < iMaxInd; ++i)
    7164       22512 :                                         if (pafRealVals[i] == fVal &&
    7165        2626 :                                             ++panRealSums[i] >
    7166        2626 :                                                 panRealSums[iMaxVal])
    7167             :                                         {
    7168         801 :                                             iMaxVal = i;
    7169         801 :                                             break;
    7170             :                                         }
    7171             : 
    7172             :                                     // Add to arr if entry not already there.
    7173       13600 :                                     if (i == iMaxInd)
    7174             :                                     {
    7175       12799 :                                         pafRealVals[iMaxInd] = fVal;
    7176       12799 :                                         panRealSums[iMaxInd] = 1;
    7177             : 
    7178       12799 :                                         if (iMaxVal < 0)
    7179        3400 :                                             iMaxVal = iMaxInd;
    7180             : 
    7181       12799 :                                         ++iMaxInd;
    7182             :                                     }
    7183             :                                 }
    7184             :                             }
    7185             :                         }
    7186             : 
    7187        3400 :                         if (iMaxVal != -1)
    7188             :                         {
    7189        3400 :                             dfValueReal = pafRealVals[iMaxVal];
    7190             : 
    7191        3400 :                             if (poWK->bApplyVerticalShift)
    7192             :                             {
    7193           0 :                                 if (!std::isfinite(padfZ[iDstX]))
    7194           0 :                                     continue;
    7195             :                                 // Subtract padfZ[] since the coordinate
    7196             :                                 // transformation is from target to source
    7197           0 :                                 dfValueReal =
    7198           0 :                                     dfValueReal *
    7199           0 :                                         poWK->dfMultFactorVerticalShift -
    7200           0 :                                     padfZ[iDstX] *
    7201             :                                         dfMultFactorVerticalShiftPipeline;
    7202             :                             }
    7203             : 
    7204        3400 :                             dfBandDensity = 1;
    7205        3400 :                             bHasFoundDensity = true;
    7206             :                         }
    7207             :                     }
    7208             :                     else  // byte or int16.
    7209             :                     {
    7210      416614 :                         int nMaxVal = 0;
    7211      416614 :                         int iMaxInd = -1;
    7212             : 
    7213      416614 :                         memset(panVals, 0, nBins * sizeof(int));
    7214             : 
    7215     1452530 :                         for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7216             :                         {
    7217     1035920 :                             iSrcOffset =
    7218     1035920 :                                 iSrcXMin +
    7219     1035920 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7220     4573090 :                             for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7221             :                                  iSrcX++, iSrcOffset++)
    7222             :                             {
    7223     3537170 :                                 if (bWrapOverX)
    7224        1030 :                                     iSrcOffset =
    7225        1030 :                                         (iSrcX % nSrcXSize) +
    7226        1030 :                                         static_cast<GPtrDiff_t>(iSrcY) *
    7227        1030 :                                             nSrcXSize;
    7228             : 
    7229     3537170 :                                 if (poWK->panUnifiedSrcValid != nullptr &&
    7230           0 :                                     !CPLMaskGet(poWK->panUnifiedSrcValid,
    7231             :                                                 iSrcOffset))
    7232           0 :                                     continue;
    7233             : 
    7234     3537170 :                                 if (GWKGetPixelValue(
    7235             :                                         poWK, iBand, iSrcOffset, &dfBandDensity,
    7236     7074340 :                                         &dfValueRealTmp, &dfValueImagTmp) &&
    7237     3537170 :                                     dfBandDensity > BAND_DENSITY_THRESHOLD)
    7238             :                                 {
    7239     3537170 :                                     const int nVal =
    7240     3537170 :                                         static_cast<int>(dfValueRealTmp);
    7241     3537170 :                                     if (++panVals[nVal + nBinsOffset] > nMaxVal)
    7242             :                                     {
    7243             :                                         // Sum the density.
    7244             :                                         // Is it the most common value so far?
    7245     2732830 :                                         iMaxInd = nVal;
    7246     2732830 :                                         nMaxVal = panVals[nVal + nBinsOffset];
    7247             :                                     }
    7248             :                                 }
    7249             :                             }
    7250             :                         }
    7251             : 
    7252      416614 :                         if (iMaxInd != -1)
    7253             :                         {
    7254      416614 :                             dfValueReal = iMaxInd;
    7255             : 
    7256      416614 :                             if (poWK->bApplyVerticalShift)
    7257             :                             {
    7258           0 :                                 if (!std::isfinite(padfZ[iDstX]))
    7259           0 :                                     continue;
    7260             :                                 // Subtract padfZ[] since the coordinate
    7261             :                                 // transformation is from target to source
    7262           0 :                                 dfValueReal =
    7263           0 :                                     dfValueReal *
    7264           0 :                                         poWK->dfMultFactorVerticalShift -
    7265           0 :                                     padfZ[iDstX] *
    7266             :                                         dfMultFactorVerticalShiftPipeline;
    7267             :                             }
    7268             : 
    7269      416614 :                             dfBandDensity = 1;
    7270      416614 :                             bHasFoundDensity = true;
    7271             :                         }
    7272      420014 :                     }
    7273             :                 }  // GRA_Mode.
    7274     1895930 :                 else if (nAlgo == GWKAOM_Max)
    7275             :                 // poWK->eResample == GRA_Max.
    7276             :                 {
    7277      335037 :                     bool bFoundValid = false;
    7278      335037 :                     double dfTotalReal = std::numeric_limits<double>::lowest();
    7279             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7280     1288010 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7281             :                     {
    7282      952975 :                         iSrcOffset = iSrcXMin +
    7283      952975 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7284     4406540 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7285             :                              iSrcX++, iSrcOffset++)
    7286             :                         {
    7287     3453560 :                             if (bWrapOverX)
    7288        1030 :                                 iSrcOffset =
    7289        1030 :                                     (iSrcX % nSrcXSize) +
    7290        1030 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7291             : 
    7292     3456370 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7293        2809 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7294             :                                             iSrcOffset))
    7295             :                             {
    7296        2446 :                                 continue;
    7297             :                             }
    7298             : 
    7299             :                             // Returns pixel value if it is not no data.
    7300     3451120 :                             if (GWKGetPixelValue(
    7301             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7302     6902230 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7303     3451120 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7304             :                             {
    7305     3451120 :                                 bFoundValid = true;
    7306     3451120 :                                 if (dfTotalReal < dfValueRealTmp)
    7307             :                                 {
    7308      442642 :                                     dfTotalReal = dfValueRealTmp;
    7309             :                                 }
    7310             :                             }
    7311             :                         }
    7312             :                     }
    7313             : 
    7314      335037 :                     if (bFoundValid)
    7315             :                     {
    7316      335037 :                         dfValueReal = dfTotalReal;
    7317             : 
    7318      335037 :                         if (poWK->bApplyVerticalShift)
    7319             :                         {
    7320           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7321           0 :                                 continue;
    7322             :                             // Subtract padfZ[] since the coordinate
    7323             :                             // transformation is from target to source
    7324           0 :                             dfValueReal =
    7325           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7326           0 :                                 padfZ[iDstX] *
    7327             :                                     dfMultFactorVerticalShiftPipeline;
    7328             :                         }
    7329             : 
    7330      335037 :                         dfBandDensity = 1;
    7331      335037 :                         bHasFoundDensity = true;
    7332             :                     }
    7333             :                 }  // GRA_Max.
    7334     1560900 :                 else if (nAlgo == GWKAOM_Min)
    7335             :                 // poWK->eResample == GRA_Min.
    7336             :                 {
    7337      335012 :                     bool bFoundValid = false;
    7338      335012 :                     double dfTotalReal = std::numeric_limits<double>::max();
    7339             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7340     1287720 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7341             :                     {
    7342      952710 :                         iSrcOffset = iSrcXMin +
    7343      952710 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7344     4403460 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7345             :                              iSrcX++, iSrcOffset++)
    7346             :                         {
    7347     3450750 :                             if (bWrapOverX)
    7348        1030 :                                 iSrcOffset =
    7349        1030 :                                     (iSrcX % nSrcXSize) +
    7350        1030 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7351             : 
    7352     3450750 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7353           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7354             :                                             iSrcOffset))
    7355             :                             {
    7356           0 :                                 continue;
    7357             :                             }
    7358             : 
    7359             :                             // Returns pixel value if it is not no data.
    7360     3450750 :                             if (GWKGetPixelValue(
    7361             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7362     6901500 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7363     3450750 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7364             :                             {
    7365     3450750 :                                 bFoundValid = true;
    7366     3450750 :                                 if (dfTotalReal > dfValueRealTmp)
    7367             :                                 {
    7368      443069 :                                     dfTotalReal = dfValueRealTmp;
    7369             :                                 }
    7370             :                             }
    7371             :                         }
    7372             :                     }
    7373             : 
    7374      335012 :                     if (bFoundValid)
    7375             :                     {
    7376      335012 :                         dfValueReal = dfTotalReal;
    7377             : 
    7378      335012 :                         if (poWK->bApplyVerticalShift)
    7379             :                         {
    7380           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7381           0 :                                 continue;
    7382             :                             // Subtract padfZ[] since the coordinate
    7383             :                             // transformation is from target to source
    7384           0 :                             dfValueReal =
    7385           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7386           0 :                                 padfZ[iDstX] *
    7387             :                                     dfMultFactorVerticalShiftPipeline;
    7388             :                         }
    7389             : 
    7390      335012 :                         dfBandDensity = 1;
    7391      335012 :                         bHasFoundDensity = true;
    7392             :                     }
    7393             :                 }  // GRA_Min.
    7394     1225880 :                 else if (nAlgo == GWKAOM_Quant)
    7395             :                 // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
    7396             :                 {
    7397     1005040 :                     bool bFoundValid = false;
    7398     1005040 :                     std::vector<double> dfRealValuesTmp;
    7399             : 
    7400             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    7401     3863170 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7402             :                     {
    7403     2858130 :                         iSrcOffset = iSrcXMin +
    7404     2858130 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7405    13210400 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7406             :                              iSrcX++, iSrcOffset++)
    7407             :                         {
    7408    10352300 :                             if (bWrapOverX)
    7409        3090 :                                 iSrcOffset =
    7410        3090 :                                     (iSrcX % nSrcXSize) +
    7411        3090 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7412             : 
    7413    10352300 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7414           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7415             :                                             iSrcOffset))
    7416             :                             {
    7417           0 :                                 continue;
    7418             :                             }
    7419             : 
    7420             :                             // Returns pixel value if it is not no data.
    7421    10352300 :                             if (GWKGetPixelValue(
    7422             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7423    20704500 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7424    10352300 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7425             :                             {
    7426    10352300 :                                 bFoundValid = true;
    7427    10352300 :                                 dfRealValuesTmp.push_back(dfValueRealTmp);
    7428             :                             }
    7429             :                         }
    7430             :                     }
    7431             : 
    7432     1005040 :                     if (bFoundValid)
    7433             :                     {
    7434     1005040 :                         std::sort(dfRealValuesTmp.begin(),
    7435             :                                   dfRealValuesTmp.end());
    7436             :                         int quantIdx = static_cast<int>(
    7437     1005040 :                             std::ceil(quant * dfRealValuesTmp.size() - 1));
    7438     1005040 :                         dfValueReal = dfRealValuesTmp[quantIdx];
    7439             : 
    7440     1005040 :                         if (poWK->bApplyVerticalShift)
    7441             :                         {
    7442           0 :                             if (!std::isfinite(padfZ[iDstX]))
    7443           0 :                                 continue;
    7444             :                             // Subtract padfZ[] since the coordinate
    7445             :                             // transformation is from target to source
    7446           0 :                             dfValueReal =
    7447           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    7448           0 :                                 padfZ[iDstX] *
    7449             :                                     dfMultFactorVerticalShiftPipeline;
    7450             :                         }
    7451             : 
    7452     1005040 :                         dfBandDensity = 1;
    7453     1005040 :                         bHasFoundDensity = true;
    7454     1005040 :                         dfRealValuesTmp.clear();
    7455             :                     }
    7456             :                 }  // Quantile.
    7457             : 
    7458             :                 /* --------------------------------------------------------------------
    7459             :                  */
    7460             :                 /*      We have a computed value from the source.  Now apply it
    7461             :                  * to      */
    7462             :                 /*      the destination pixel. */
    7463             :                 /* --------------------------------------------------------------------
    7464             :                  */
    7465     2536360 :                 if (bHasFoundDensity)
    7466             :                 {
    7467             :                     // TODO: Should we compute dfBandDensity in fct of
    7468             :                     // nCount/nCount2, or use as a threshold to set the dest
    7469             :                     // value?
    7470             :                     // dfBandDensity = (float) nCount / nCount2;
    7471             :                     // if( (float) nCount / nCount2 > 0.1 )
    7472             :                     // or fix gdalwarp crop_to_cutline to crop partially
    7473             :                     // overlapping pixels.
    7474     2536360 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    7475             :                                      dfValueReal, dfValueImag);
    7476             :                 }
    7477             :             }
    7478             : 
    7479     1423160 :             if (!bHasFoundDensity)
    7480      311290 :                 continue;
    7481             : 
    7482             :             /* --------------------------------------------------------------------
    7483             :              */
    7484             :             /*      Update destination density/validity masks. */
    7485             :             /* --------------------------------------------------------------------
    7486             :              */
    7487     1111860 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7488             : 
    7489     1111860 :             if (poWK->panDstValid != nullptr)
    7490             :             {
    7491          74 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7492             :             }
    7493             :         } /* Next iDstX */
    7494             : 
    7495             :         /* --------------------------------------------------------------------
    7496             :          */
    7497             :         /*      Report progress to the user, and optionally cancel out. */
    7498             :         /* --------------------------------------------------------------------
    7499             :          */
    7500        5884 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    7501           0 :             break;
    7502             :     }
    7503             : 
    7504             :     /* -------------------------------------------------------------------- */
    7505             :     /*      Cleanup and return.                                             */
    7506             :     /* -------------------------------------------------------------------- */
    7507         117 :     CPLFree(padfX);
    7508         117 :     CPLFree(padfY);
    7509         117 :     CPLFree(padfZ);
    7510         117 :     CPLFree(padfX2);
    7511         117 :     CPLFree(padfY2);
    7512         117 :     CPLFree(padfZ2);
    7513         117 :     CPLFree(pabSuccess);
    7514         117 :     CPLFree(pabSuccess2);
    7515         117 :     VSIFree(panVals);
    7516         117 :     VSIFree(pafRealVals);
    7517         117 :     VSIFree(panRealSums);
    7518         117 :     if (bIsComplex)
    7519             :     {
    7520          18 :         VSIFree(pafImagVals);
    7521          18 :         VSIFree(panImagSums);
    7522             :     }
    7523             : }
    7524             : 
    7525             : /************************************************************************/
    7526             : /*                         getOrientation()                             */
    7527             : /************************************************************************/
    7528             : 
    7529             : typedef std::pair<double, double> XYPair;
    7530             : 
    7531             : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
    7532             : // -1 if it is counter-clockwise oriented,
    7533             : // or 0 if it is colinear.
    7534     2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
    7535             : {
    7536     2355910 :     const double p1x = p1.first;
    7537     2355910 :     const double p1y = p1.second;
    7538     2355910 :     const double p2x = p2.first;
    7539     2355910 :     const double p2y = p2.second;
    7540     2355910 :     const double p3x = p3.first;
    7541     2355910 :     const double p3y = p3.second;
    7542     2355910 :     const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
    7543     2355910 :     if (std::abs(val) < 1e-20)
    7544        2690 :         return 0;
    7545     2353220 :     else if (val > 0)
    7546           0 :         return 1;
    7547             :     else
    7548     2353220 :         return -1;
    7549             : }
    7550             : 
    7551             : /************************************************************************/
    7552             : /*                          isConvex()                                  */
    7553             : /************************************************************************/
    7554             : 
    7555             : typedef std::vector<XYPair> XYPoly;
    7556             : 
    7557             : // poly must be closed
    7558      785302 : static bool isConvex(const XYPoly &poly)
    7559             : {
    7560      785302 :     const size_t n = poly.size();
    7561      785302 :     size_t i = 0;
    7562      785302 :     int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    7563      785302 :     ++i;
    7564     2355910 :     for (; i < n - 2; ++i)
    7565             :     {
    7566             :         const int orientation =
    7567     1570600 :             getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    7568     1570600 :         if (orientation != 0)
    7569             :         {
    7570     1567910 :             if (last_orientation == 0)
    7571           0 :                 last_orientation = orientation;
    7572     1567910 :             else if (orientation != last_orientation)
    7573           0 :                 return false;
    7574             :         }
    7575             :     }
    7576      785302 :     return true;
    7577             : }
    7578             : 
    7579             : /************************************************************************/
    7580             : /*                     pointIntersectsConvexPoly()                      */
    7581             : /************************************************************************/
    7582             : 
    7583             : // Returns whether xy intersects poly, that must be closed and convex.
    7584     6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
    7585             : {
    7586     6049100 :     const size_t n = poly.size();
    7587     6049100 :     double dx1 = xy.first - poly[0].first;
    7588     6049100 :     double dy1 = xy.second - poly[0].second;
    7589     6049100 :     double dx2 = poly[1].first - poly[0].first;
    7590     6049100 :     double dy2 = poly[1].second - poly[0].second;
    7591     6049100 :     double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
    7592             : 
    7593             :     // Check if the point remains on the same side (left/right) of all edges
    7594    14556400 :     for (size_t i = 2; i < n; i++)
    7595             :     {
    7596    12793100 :         dx1 = xy.first - poly[i - 1].first;
    7597    12793100 :         dy1 = xy.second - poly[i - 1].second;
    7598             : 
    7599    12793100 :         dx2 = poly[i].first - poly[i - 1].first;
    7600    12793100 :         dy2 = poly[i].second - poly[i - 1].second;
    7601             : 
    7602    12793100 :         double crossProduct = dx1 * dy2 - dx2 * dy1;
    7603    12793100 :         if (std::abs(prevCrossProduct) < 1e-20)
    7604      725558 :             prevCrossProduct = crossProduct;
    7605    12067500 :         else if (prevCrossProduct * crossProduct < 0)
    7606     4285760 :             return false;
    7607             :     }
    7608             : 
    7609     1763340 :     return true;
    7610             : }
    7611             : 
    7612             : /************************************************************************/
    7613             : /*                     getIntersection()                                */
    7614             : /************************************************************************/
    7615             : 
    7616             : /* Returns intersection of [p1,p2] with [p3,p4], if
    7617             :  * it is a single point, and the 2 segments are not colinear.
    7618             :  */
    7619    11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
    7620             :                             const XYPair &p3, const XYPair &p4, XYPair &xy)
    7621             : {
    7622    11811000 :     const double x1 = p1.first;
    7623    11811000 :     const double y1 = p1.second;
    7624    11811000 :     const double x2 = p2.first;
    7625    11811000 :     const double y2 = p2.second;
    7626    11811000 :     const double x3 = p3.first;
    7627    11811000 :     const double y3 = p3.second;
    7628    11811000 :     const double x4 = p4.first;
    7629    11811000 :     const double y4 = p4.second;
    7630    11811000 :     const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
    7631    11811000 :     const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
    7632    11811000 :     if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
    7633     9260780 :         return false;
    7634             : 
    7635     2550260 :     const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
    7636     2550260 :     if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
    7637      973924 :         return false;
    7638             : 
    7639     1576340 :     const double t = t_num / denom;
    7640     1576340 :     xy.first = x1 + t * (x2 - x1);
    7641     1576340 :     xy.second = y1 + t * (y2 - y1);
    7642     1576340 :     return true;
    7643             : }
    7644             : 
    7645             : /************************************************************************/
    7646             : /*                     getConvexPolyIntersection()                      */
    7647             : /************************************************************************/
    7648             : 
    7649             : // poly1 and poly2 must be closed and convex.
    7650             : // The returned intersection will not necessary be closed.
    7651      785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
    7652             :                                       XYPoly &intersection)
    7653             : {
    7654      785302 :     intersection.clear();
    7655             : 
    7656             :     // Add all points of poly1 inside poly2
    7657     3926510 :     for (size_t i = 0; i < poly1.size() - 1; ++i)
    7658             :     {
    7659     3141210 :         if (pointIntersectsConvexPoly(poly1[i], poly2))
    7660     1187430 :             intersection.push_back(poly1[i]);
    7661             :     }
    7662      785302 :     if (intersection.size() == poly1.size() - 1)
    7663             :     {
    7664             :         // poly1 is inside poly2
    7665      119100 :         return;
    7666             :     }
    7667             : 
    7668             :     // Add all points of poly2 inside poly1
    7669     3634860 :     for (size_t i = 0; i < poly2.size() - 1; ++i)
    7670             :     {
    7671     2907890 :         if (pointIntersectsConvexPoly(poly2[i], poly1))
    7672      575904 :             intersection.push_back(poly2[i]);
    7673             :     }
    7674             : 
    7675             :     // Compute the intersection of all edges of both polygons
    7676      726972 :     XYPair xy;
    7677     3634860 :     for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
    7678             :     {
    7679    14539400 :         for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
    7680             :         {
    7681    11631600 :             if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
    7682    11631600 :                                 poly2[i2 + 1], xy))
    7683             :             {
    7684     1576230 :                 intersection.push_back(xy);
    7685             :             }
    7686             :         }
    7687             :     }
    7688             : 
    7689      726972 :     if (intersection.empty())
    7690       60770 :         return;
    7691             : 
    7692             :     // Find lowest-left point in intersection set
    7693      666202 :     double lowest_x = std::numeric_limits<double>::max();
    7694      666202 :     double lowest_y = std::numeric_limits<double>::max();
    7695     3772450 :     for (const auto &pair : intersection)
    7696             :     {
    7697     3106240 :         const double x = pair.first;
    7698     3106240 :         const double y = pair.second;
    7699     3106240 :         if (y < lowest_y || (y == lowest_y && x < lowest_x))
    7700             :         {
    7701     1096040 :             lowest_x = x;
    7702     1096040 :             lowest_y = y;
    7703             :         }
    7704             :     }
    7705             : 
    7706     5737980 :     const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
    7707             :     {
    7708     5737980 :         const double p1x_diff = p1.first - lowest_x;
    7709     5737980 :         const double p1y_diff = p1.second - lowest_y;
    7710     5737980 :         const double p2x_diff = p2.first - lowest_x;
    7711     5737980 :         const double p2y_diff = p2.second - lowest_y;
    7712     5737980 :         if (p2y_diff == 0.0 && p1y_diff == 0.0)
    7713             :         {
    7714     2655420 :             if (p1x_diff >= 0)
    7715             :             {
    7716     2655420 :                 if (p2x_diff >= 0)
    7717     2655420 :                     return p1.first < p2.first;
    7718           0 :                 return true;
    7719             :             }
    7720             :             else
    7721             :             {
    7722           0 :                 if (p2x_diff >= 0)
    7723           0 :                     return false;
    7724           0 :                 return p1.first < p2.first;
    7725             :             }
    7726             :         }
    7727             : 
    7728     3082560 :         if (p2x_diff == 0.0 && p1x_diff == 0.0)
    7729     1046960 :             return p1.second < p2.second;
    7730             : 
    7731             :         double tan_p1;
    7732     2035600 :         if (p1x_diff == 0.0)
    7733      464622 :             tan_p1 = p1y_diff == 0.0 ? 0.0 : std::numeric_limits<double>::max();
    7734             :         else
    7735     1570980 :             tan_p1 = p1y_diff / p1x_diff;
    7736             : 
    7737             :         double tan_p2;
    7738     2035600 :         if (p2x_diff == 0.0)
    7739      839515 :             tan_p2 = p2y_diff == 0.0 ? 0.0 : std::numeric_limits<double>::max();
    7740             :         else
    7741     1196080 :             tan_p2 = p2y_diff / p2x_diff;
    7742             : 
    7743     2035600 :         if (tan_p1 >= 0)
    7744             :         {
    7745     1904790 :             if (tan_p2 >= 0)
    7746     1881590 :                 return tan_p1 < tan_p2;
    7747             :             else
    7748       23199 :                 return true;
    7749             :         }
    7750             :         else
    7751             :         {
    7752      130806 :             if (tan_p2 >= 0)
    7753      103900 :                 return false;
    7754             :             else
    7755       26906 :                 return tan_p1 < tan_p2;
    7756             :         }
    7757      666202 :     };
    7758             : 
    7759             :     // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
    7760             :     // hull
    7761      666202 :     std::sort(intersection.begin(), intersection.end(), sortFunc);
    7762             : 
    7763             :     // Remove duplicated points
    7764      666202 :     size_t j = 1;
    7765     3106240 :     for (size_t i = 1; i < intersection.size(); ++i)
    7766             :     {
    7767     2440040 :         if (intersection[i] != intersection[i - 1])
    7768             :         {
    7769     1452560 :             if (j < i)
    7770      545275 :                 intersection[j] = intersection[i];
    7771     1452560 :             ++j;
    7772             :         }
    7773             :     }
    7774      666202 :     intersection.resize(j);
    7775             : }
    7776             : 
    7777             : /************************************************************************/
    7778             : /*                            getArea()                                 */
    7779             : /************************************************************************/
    7780             : 
    7781             : // poly may or may not be closed.
    7782      558521 : static double getArea(const XYPoly &poly)
    7783             : {
    7784             :     // CPLAssert(poly.size() >= 2);
    7785      558521 :     const size_t nPointCount = poly.size();
    7786             :     double dfAreaSum =
    7787      558521 :         poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
    7788             : 
    7789     1765140 :     for (size_t i = 1; i < nPointCount - 1; i++)
    7790             :     {
    7791     1206610 :         dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
    7792             :     }
    7793             : 
    7794      558521 :     dfAreaSum += poly[nPointCount - 1].first *
    7795      558521 :                  (poly[0].second - poly[nPointCount - 2].second);
    7796             : 
    7797      558521 :     return 0.5 * std::fabs(dfAreaSum);
    7798             : }
    7799             : 
    7800             : /************************************************************************/
    7801             : /*                           GWKSumPreserving()                         */
    7802             : /************************************************************************/
    7803             : 
    7804             : static void GWKSumPreservingThread(void *pData);
    7805             : 
    7806          18 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
    7807             : {
    7808          18 :     return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
    7809             : }
    7810             : 
    7811          18 : static void GWKSumPreservingThread(void *pData)
    7812             : {
    7813          18 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    7814          18 :     GDALWarpKernel *poWK = psJob->poWK;
    7815          18 :     const int iYMin = psJob->iYMin;
    7816          18 :     const int iYMax = psJob->iYMax;
    7817             :     const bool bIsAffineNoRotation =
    7818          18 :         GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
    7819          26 :                                         poWK->pTransformerArg) &&
    7820             :         // for debug/testing purposes
    7821           8 :         CPLTestBool(
    7822          18 :             CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
    7823             : 
    7824          18 :     const int nDstXSize = poWK->nDstXSize;
    7825          18 :     const int nSrcXSize = poWK->nSrcXSize;
    7826          18 :     const int nSrcYSize = poWK->nSrcYSize;
    7827             : 
    7828          36 :     std::vector<double> adfX0(nSrcXSize + 1);
    7829          36 :     std::vector<double> adfY0(nSrcXSize + 1);
    7830          36 :     std::vector<double> adfZ0(nSrcXSize + 1);
    7831          36 :     std::vector<double> adfX1(nSrcXSize + 1);
    7832          36 :     std::vector<double> adfY1(nSrcXSize + 1);
    7833          36 :     std::vector<double> adfZ1(nSrcXSize + 1);
    7834          36 :     std::vector<int> abSuccess0(nSrcXSize + 1);
    7835          36 :     std::vector<int> abSuccess1(nSrcXSize + 1);
    7836             : 
    7837             :     CPLRectObj sGlobalBounds;
    7838          18 :     sGlobalBounds.minx = -2 * poWK->dfXScale;
    7839          18 :     sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
    7840          18 :     sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
    7841          18 :     sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
    7842          18 :     CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
    7843             : 
    7844             :     struct SourcePixel
    7845             :     {
    7846             :         int iSrcX;
    7847             :         int iSrcY;
    7848             : 
    7849             :         // Coordinates of source pixel in target pixel coordinates
    7850             :         double dfDstX0;
    7851             :         double dfDstY0;
    7852             :         double dfDstX1;
    7853             :         double dfDstY1;
    7854             :         double dfDstX2;
    7855             :         double dfDstY2;
    7856             :         double dfDstX3;
    7857             :         double dfDstY3;
    7858             : 
    7859             :         // Source pixel total area (might be larger than the one described
    7860             :         // by above coordinates, if the pixel was crossing the antimeridian
    7861             :         // and split)
    7862             :         double dfArea;
    7863             :     };
    7864             : 
    7865          36 :     std::vector<SourcePixel> sourcePixels;
    7866             : 
    7867          36 :     XYPoly discontinuityLeft(5);
    7868          36 :     XYPoly discontinuityRight(5);
    7869             : 
    7870             :     /* ==================================================================== */
    7871             :     /*      First pass: transform the 4 corners of each potential           */
    7872             :     /*      contributing source pixel to target pixel coordinates.          */
    7873             :     /* ==================================================================== */
    7874             : 
    7875             :     // Special case for top line
    7876             :     {
    7877          18 :         int iY = 0;
    7878        1130 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    7879             :         {
    7880        1112 :             adfX1[iX] = iX + poWK->nSrcXOff;
    7881        1112 :             adfY1[iX] = iY + poWK->nSrcYOff;
    7882        1112 :             adfZ1[iX] = 0;
    7883             :         }
    7884             : 
    7885          18 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    7886             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    7887             :                              abSuccess1.data());
    7888             : 
    7889        1130 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    7890             :         {
    7891        1112 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    7892           0 :                 abSuccess1[iX] = FALSE;
    7893             :             else
    7894             :             {
    7895        1112 :                 adfX1[iX] -= poWK->nDstXOff;
    7896        1112 :                 adfY1[iX] -= poWK->nDstYOff;
    7897             :             }
    7898             :         }
    7899             :     }
    7900             : 
    7901      413412 :     const auto getInsideXSign = [poWK, nDstXSize](double dfX)
    7902             :     {
    7903      413412 :         return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
    7904      205344 :                        dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
    7905      413412 :                    ? 1
    7906      208068 :                    : -1;
    7907          18 :     };
    7908             : 
    7909             :     const auto FindDiscontinuity =
    7910          80 :         [poWK, psJob, getInsideXSign](
    7911             :             double dfXLeft, double dfXRight, double dfY,
    7912             :             int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
    7913         800 :             double &dfXMidReprojectedRight, double &dfYMidReprojected)
    7914             :     {
    7915         880 :         for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
    7916             :         {
    7917         800 :             double dfXMid = (dfXLeft + dfXRight) / 2;
    7918         800 :             double dfXMidReprojected = dfXMid;
    7919         800 :             dfYMidReprojected = dfY;
    7920         800 :             double dfZ = 0;
    7921         800 :             int nSuccess = 0;
    7922         800 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
    7923             :                                  &dfXMidReprojected, &dfYMidReprojected, &dfZ,
    7924             :                                  &nSuccess);
    7925         800 :             if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
    7926             :             {
    7927         456 :                 dfXRight = dfXMid;
    7928         456 :                 dfXMidReprojectedRight = dfXMidReprojected;
    7929             :             }
    7930             :             else
    7931             :             {
    7932         344 :                 dfXLeft = dfXMid;
    7933         344 :                 dfXMidReprojectedLeft = dfXMidReprojected;
    7934             :             }
    7935             :         }
    7936          80 :     };
    7937             : 
    7938         566 :     for (int iY = 0; iY < nSrcYSize; ++iY)
    7939             :     {
    7940         548 :         std::swap(adfX0, adfX1);
    7941         548 :         std::swap(adfY0, adfY1);
    7942         548 :         std::swap(adfZ0, adfZ1);
    7943         548 :         std::swap(abSuccess0, abSuccess1);
    7944             : 
    7945      104512 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    7946             :         {
    7947      103964 :             adfX1[iX] = iX + poWK->nSrcXOff;
    7948      103964 :             adfY1[iX] = iY + 1 + poWK->nSrcYOff;
    7949      103964 :             adfZ1[iX] = 0;
    7950             :         }
    7951             : 
    7952         548 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    7953             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    7954             :                              abSuccess1.data());
    7955             : 
    7956      104512 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    7957             :         {
    7958      103964 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    7959           0 :                 abSuccess1[iX] = FALSE;
    7960             :             else
    7961             :             {
    7962      103964 :                 adfX1[iX] -= poWK->nDstXOff;
    7963      103964 :                 adfY1[iX] -= poWK->nDstYOff;
    7964             :             }
    7965             :         }
    7966             : 
    7967      103964 :         for (int iX = 0; iX < nSrcXSize; ++iX)
    7968             :         {
    7969      206832 :             if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
    7970      103416 :                 abSuccess1[iX + 1])
    7971             :             {
    7972             :                 /* --------------------------------------------------------------------
    7973             :                  */
    7974             :                 /*      Do not try to apply transparent source pixels to the
    7975             :                  * destination.*/
    7976             :                 /* --------------------------------------------------------------------
    7977             :                  */
    7978      103416 :                 const auto iSrcOffset =
    7979      103416 :                     iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
    7980      105816 :                 if (poWK->panUnifiedSrcValid != nullptr &&
    7981        2400 :                     !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7982             :                 {
    7983       10971 :                     continue;
    7984             :                 }
    7985             : 
    7986      103410 :                 if (poWK->pafUnifiedSrcDensity != nullptr)
    7987             :                 {
    7988           0 :                     if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
    7989             :                         SRC_DENSITY_THRESHOLD)
    7990           0 :                         continue;
    7991             :                 }
    7992             : 
    7993             :                 SourcePixel sp;
    7994      103410 :                 sp.dfArea = 0;
    7995      103410 :                 sp.dfDstX0 = adfX0[iX];
    7996      103410 :                 sp.dfDstY0 = adfY0[iX];
    7997      103410 :                 sp.dfDstX1 = adfX0[iX + 1];
    7998      103410 :                 sp.dfDstY1 = adfY0[iX + 1];
    7999      103410 :                 sp.dfDstX2 = adfX1[iX + 1];
    8000      103410 :                 sp.dfDstY2 = adfY1[iX + 1];
    8001      103410 :                 sp.dfDstX3 = adfX1[iX];
    8002      103410 :                 sp.dfDstY3 = adfY1[iX];
    8003             : 
    8004             :                 // Detect pixel that likely cross the anti-meridian and
    8005             :                 // introduce a discontinuity when reprojected.
    8006             : 
    8007      103410 :                 if (getInsideXSign(adfX0[iX]) !=
    8008      103506 :                         getInsideXSign(adfX0[iX + 1]) &&
    8009         164 :                     getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
    8010          68 :                     getInsideXSign(adfX0[iX + 1]) ==
    8011      103574 :                         getInsideXSign(adfX1[iX + 1]) &&
    8012          40 :                     (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
    8013             :                         0)
    8014             :                 {
    8015          40 :                     double dfXMidReprojectedLeftTop = 0;
    8016          40 :                     double dfXMidReprojectedRightTop = 0;
    8017          40 :                     double dfYMidReprojectedTop = 0;
    8018          40 :                     FindDiscontinuity(
    8019          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8020          80 :                         iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
    8021             :                         dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
    8022             :                         dfYMidReprojectedTop);
    8023          40 :                     double dfXMidReprojectedLeftBottom = 0;
    8024          40 :                     double dfXMidReprojectedRightBottom = 0;
    8025          40 :                     double dfYMidReprojectedBottom = 0;
    8026          40 :                     FindDiscontinuity(
    8027          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8028          80 :                         iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
    8029             :                         dfXMidReprojectedLeftBottom,
    8030             :                         dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
    8031             : 
    8032          40 :                     discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
    8033          40 :                     discontinuityLeft[1] =
    8034          80 :                         XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
    8035          40 :                     discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
    8036          40 :                                                   dfYMidReprojectedBottom);
    8037          40 :                     discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
    8038          40 :                     discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
    8039             : 
    8040          40 :                     discontinuityRight[0] =
    8041          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8042          40 :                     discontinuityRight[1] =
    8043          80 :                         XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
    8044          40 :                     discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
    8045          40 :                                                    dfYMidReprojectedBottom);
    8046          40 :                     discontinuityRight[3] =
    8047          80 :                         XYPair(adfX1[iX + 1], adfY1[iX + 1]);
    8048          40 :                     discontinuityRight[4] =
    8049          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8050             : 
    8051          40 :                     sp.dfArea = getArea(discontinuityLeft) +
    8052          40 :                                 getArea(discontinuityRight);
    8053          40 :                     if (getInsideXSign(adfX0[iX]) >= 1)
    8054             :                     {
    8055          20 :                         sp.dfDstX1 = dfXMidReprojectedLeftTop;
    8056          20 :                         sp.dfDstY1 = dfYMidReprojectedTop;
    8057          20 :                         sp.dfDstX2 = dfXMidReprojectedLeftBottom;
    8058          20 :                         sp.dfDstY2 = dfYMidReprojectedBottom;
    8059             :                     }
    8060             :                     else
    8061             :                     {
    8062          20 :                         sp.dfDstX0 = dfXMidReprojectedRightTop;
    8063          20 :                         sp.dfDstY0 = dfYMidReprojectedTop;
    8064          20 :                         sp.dfDstX3 = dfXMidReprojectedRightBottom;
    8065          20 :                         sp.dfDstY3 = dfYMidReprojectedBottom;
    8066             :                     }
    8067             :                 }
    8068             : 
    8069             :                 // Bounding box of source pixel (expressed in target pixel
    8070             :                 // coordinates)
    8071             :                 CPLRectObj sRect;
    8072      103410 :                 sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
    8073      103410 :                                       std::min(sp.dfDstX2, sp.dfDstX3));
    8074      103410 :                 sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
    8075      103410 :                                       std::min(sp.dfDstY2, sp.dfDstY3));
    8076      103410 :                 sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
    8077      103410 :                                       std::max(sp.dfDstX2, sp.dfDstX3));
    8078      103410 :                 sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
    8079      103410 :                                       std::max(sp.dfDstY2, sp.dfDstY3));
    8080      103410 :                 if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
    8081      101350 :                       sRect.miny < iYMax && sRect.maxy > iYMin))
    8082             :                 {
    8083       10852 :                     continue;
    8084             :                 }
    8085             : 
    8086       92558 :                 sp.iSrcX = iX;
    8087       92558 :                 sp.iSrcY = iY;
    8088             : 
    8089       92558 :                 if (!bIsAffineNoRotation)
    8090             :                 {
    8091             :                     // Check polygon validity (no self-crossing)
    8092       89745 :                     XYPair xy;
    8093       89745 :                     if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
    8094       89745 :                                         XYPair(sp.dfDstX1, sp.dfDstY1),
    8095       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8096      269235 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
    8097       89745 :                         getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
    8098       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8099       89745 :                                         XYPair(sp.dfDstX0, sp.dfDstY0),
    8100      179490 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy))
    8101             :                     {
    8102         113 :                         continue;
    8103             :                     }
    8104             :                 }
    8105             : 
    8106       92445 :                 CPLQuadTreeInsertWithBounds(
    8107             :                     hQuadTree,
    8108             :                     reinterpret_cast<void *>(
    8109       92445 :                         static_cast<uintptr_t>(sourcePixels.size())),
    8110             :                     &sRect);
    8111             : 
    8112       92445 :                 sourcePixels.push_back(sp);
    8113             :             }
    8114             :         }
    8115             :     }
    8116             : 
    8117          36 :     std::vector<double> adfRealValue(poWK->nBands);
    8118          36 :     std::vector<double> adfImagValue(poWK->nBands);
    8119          36 :     std::vector<double> adfBandDensity(poWK->nBands);
    8120          36 :     std::vector<double> adfWeight(poWK->nBands);
    8121             : 
    8122             : #ifdef CHECK_SUM_WITH_GEOS
    8123             :     auto hGEOSContext = OGRGeometry::createGEOSContext();
    8124             :     auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8125             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
    8126             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
    8127             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
    8128             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
    8129             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
    8130             :     auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
    8131             :     auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
    8132             : 
    8133             :     auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    8134             :     auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
    8135             :     auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
    8136             : #endif
    8137             : 
    8138             :     const XYPoly xy1{
    8139          36 :         {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
    8140          36 :     XYPoly xy2(5);
    8141          36 :     XYPoly xy2_triangle(4);
    8142          36 :     XYPoly intersection;
    8143             : 
    8144             :     /* ==================================================================== */
    8145             :     /*      Loop over output lines.                                         */
    8146             :     /* ==================================================================== */
    8147         891 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    8148             :     {
    8149             :         CPLRectObj sRect;
    8150         873 :         sRect.miny = iDstY;
    8151         873 :         sRect.maxy = iDstY + 1;
    8152             : 
    8153             :         /* ====================================================================
    8154             :          */
    8155             :         /*      Loop over pixels in output scanline. */
    8156             :         /* ====================================================================
    8157             :          */
    8158      221042 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    8159             :         {
    8160      220169 :             sRect.minx = iDstX;
    8161      220169 :             sRect.maxx = iDstX + 1;
    8162      220169 :             int nSourcePixels = 0;
    8163             :             void **pahSourcePixel =
    8164      220169 :                 CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
    8165      220169 :             if (nSourcePixels == 0)
    8166             :             {
    8167        1258 :                 CPLFree(pahSourcePixel);
    8168        1262 :                 continue;
    8169             :             }
    8170             : 
    8171      218911 :             std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
    8172      218911 :             std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
    8173      218911 :             std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
    8174      218911 :             std::fill(adfWeight.begin(), adfWeight.end(), 0);
    8175      218911 :             double dfDensity = 0;
    8176      218911 :             double dfTotalWeight = 0;
    8177             : 
    8178             :             /* ====================================================================
    8179             :              */
    8180             :             /*          Iterate over each contributing source pixel to add its
    8181             :              */
    8182             :             /*          value weighed by the ratio of the area of its
    8183             :              * intersection  */
    8184             :             /*          with the target pixel divided by the area of the source
    8185             :              */
    8186             :             /*          pixel. */
    8187             :             /* ====================================================================
    8188             :              */
    8189     1020520 :             for (int i = 0; i < nSourcePixels; ++i)
    8190             :             {
    8191      801614 :                 const int iSourcePixel = static_cast<int>(
    8192      801614 :                     reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
    8193      801614 :                 auto &sp = sourcePixels[iSourcePixel];
    8194             : 
    8195      801614 :                 double dfWeight = 0.0;
    8196      801614 :                 if (bIsAffineNoRotation)
    8197             :                 {
    8198             :                     // Optimization since the source pixel is a rectangle in
    8199             :                     // target pixel coordinates
    8200       16312 :                     double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
    8201       16312 :                     double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
    8202       16312 :                     double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
    8203       16312 :                     double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
    8204       16312 :                     double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
    8205       16312 :                     double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
    8206       16312 :                     double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
    8207       16312 :                     double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
    8208       16312 :                     dfWeight =
    8209       16312 :                         ((dfIntersMaxX - dfIntersMinX) *
    8210       16312 :                          (dfIntersMaxY - dfIntersMinY)) /
    8211       16312 :                         ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
    8212             :                 }
    8213             :                 else
    8214             :                 {
    8215             :                     // Compute the polygon of the source pixel in target pixel
    8216             :                     // coordinates, and shifted to the target pixel (unit square
    8217             :                     // coordinates)
    8218             : 
    8219      785302 :                     xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    8220      785302 :                     xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
    8221      785302 :                     xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
    8222      785302 :                     xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
    8223      785302 :                     xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    8224             : 
    8225      785302 :                     if (isConvex(xy2))
    8226             :                     {
    8227      785302 :                         getConvexPolyIntersection(xy1, xy2, intersection);
    8228      785302 :                         if (intersection.size() >= 3)
    8229             :                         {
    8230      468849 :                             dfWeight = getArea(intersection);
    8231             :                         }
    8232             :                     }
    8233             :                     else
    8234             :                     {
    8235             :                         // Split xy2 into 2 triangles.
    8236           0 :                         xy2_triangle[0] = xy2[0];
    8237           0 :                         xy2_triangle[1] = xy2[1];
    8238           0 :                         xy2_triangle[2] = xy2[2];
    8239           0 :                         xy2_triangle[3] = xy2[0];
    8240           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    8241             :                                                   intersection);
    8242           0 :                         if (intersection.size() >= 3)
    8243             :                         {
    8244           0 :                             dfWeight = getArea(intersection);
    8245             :                         }
    8246             : 
    8247           0 :                         xy2_triangle[1] = xy2[2];
    8248           0 :                         xy2_triangle[2] = xy2[3];
    8249           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    8250             :                                                   intersection);
    8251           0 :                         if (intersection.size() >= 3)
    8252             :                         {
    8253           0 :                             dfWeight += getArea(intersection);
    8254             :                         }
    8255             :                     }
    8256      785302 :                     if (dfWeight > 0.0)
    8257             :                     {
    8258      468828 :                         if (sp.dfArea == 0)
    8259       89592 :                             sp.dfArea = getArea(xy2);
    8260      468828 :                         dfWeight /= sp.dfArea;
    8261             :                     }
    8262             : 
    8263             : #ifdef CHECK_SUM_WITH_GEOS
    8264             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
    8265             :                                          sp.dfDstX0 - iDstX,
    8266             :                                          sp.dfDstY0 - iDstY);
    8267             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
    8268             :                                          sp.dfDstX1 - iDstX,
    8269             :                                          sp.dfDstY1 - iDstY);
    8270             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
    8271             :                                          sp.dfDstX2 - iDstX,
    8272             :                                          sp.dfDstY2 - iDstY);
    8273             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
    8274             :                                          sp.dfDstX3 - iDstX,
    8275             :                                          sp.dfDstY3 - iDstY);
    8276             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
    8277             :                                          sp.dfDstX0 - iDstX,
    8278             :                                          sp.dfDstY0 - iDstY);
    8279             : 
    8280             :                     double dfWeightGEOS = 0.0;
    8281             :                     auto hIntersection =
    8282             :                         GEOSIntersection_r(hGEOSContext, hP1, hP2);
    8283             :                     if (hIntersection)
    8284             :                     {
    8285             :                         double dfIntersArea = 0.0;
    8286             :                         if (GEOSArea_r(hGEOSContext, hIntersection,
    8287             :                                        &dfIntersArea) &&
    8288             :                             dfIntersArea > 0)
    8289             :                         {
    8290             :                             double dfSourceArea = 0.0;
    8291             :                             if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
    8292             :                             {
    8293             :                                 dfWeightGEOS = dfIntersArea / dfSourceArea;
    8294             :                             }
    8295             :                         }
    8296             :                         GEOSGeom_destroy_r(hGEOSContext, hIntersection);
    8297             :                     }
    8298             :                     if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
    8299             :                     {
    8300             :                         /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
    8301             :                                         dfWeight, dfWeightGEOS);
    8302             :                         printf("xy2: ");  // ok
    8303             :                         for (const auto &xy : xy2)
    8304             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    8305             :                         printf("\n");                                   // ok
    8306             :                         printf("intersection: ");                       // ok
    8307             :                         for (const auto &xy : intersection)
    8308             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    8309             :                         printf("\n");                                   // ok
    8310             :                     }
    8311             : #endif
    8312             :                 }
    8313      801614 :                 if (dfWeight > 0.0)
    8314             :                 {
    8315      474099 :                     const GPtrDiff_t iSrcOffset =
    8316      474099 :                         sp.iSrcX +
    8317      474099 :                         static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
    8318      474099 :                     dfTotalWeight += dfWeight;
    8319             : 
    8320      474099 :                     if (poWK->pafUnifiedSrcDensity != nullptr)
    8321             :                     {
    8322           0 :                         dfDensity +=
    8323           0 :                             dfWeight * poWK->pafUnifiedSrcDensity[iSrcOffset];
    8324             :                     }
    8325             :                     else
    8326             :                     {
    8327      474099 :                         dfDensity += dfWeight;
    8328             :                     }
    8329             : 
    8330     1818720 :                     for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    8331             :                     {
    8332             :                         // Returns pixel value if it is not no data.
    8333             :                         double dfBandDensity;
    8334             :                         double dfRealValue;
    8335             :                         double dfImagValue;
    8336     2689240 :                         if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
    8337             :                                                &dfBandDensity, &dfRealValue,
    8338             :                                                &dfImagValue) &&
    8339     1344620 :                               dfBandDensity > BAND_DENSITY_THRESHOLD))
    8340             :                         {
    8341           0 :                             continue;
    8342             :                         }
    8343             : 
    8344     1344620 :                         adfRealValue[iBand] += dfRealValue * dfWeight;
    8345     1344620 :                         adfImagValue[iBand] += dfImagValue * dfWeight;
    8346     1344620 :                         adfBandDensity[iBand] += dfBandDensity * dfWeight;
    8347     1344620 :                         adfWeight[iBand] += dfWeight;
    8348             :                     }
    8349             :                 }
    8350             :             }
    8351             : 
    8352      218911 :             CPLFree(pahSourcePixel);
    8353             : 
    8354             :             /* --------------------------------------------------------------------
    8355             :              */
    8356             :             /*          Update destination pixel value. */
    8357             :             /* --------------------------------------------------------------------
    8358             :              */
    8359      218911 :             bool bHasFoundDensity = false;
    8360      218911 :             const GPtrDiff_t iDstOffset =
    8361      218911 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    8362      827822 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    8363             :             {
    8364      608911 :                 if (adfWeight[iBand] > 0)
    8365             :                 {
    8366             :                     const double dfBandDensity =
    8367      608907 :                         adfBandDensity[iBand] / adfWeight[iBand];
    8368      608907 :                     if (dfBandDensity > BAND_DENSITY_THRESHOLD)
    8369             :                     {
    8370      608907 :                         bHasFoundDensity = true;
    8371      608907 :                         GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    8372      608907 :                                          adfRealValue[iBand],
    8373      608907 :                                          adfImagValue[iBand]);
    8374             :                     }
    8375             :                 }
    8376             :             }
    8377             : 
    8378      218911 :             if (!bHasFoundDensity)
    8379           4 :                 continue;
    8380             : 
    8381             :             /* --------------------------------------------------------------------
    8382             :              */
    8383             :             /*          Update destination density/validity masks. */
    8384             :             /* --------------------------------------------------------------------
    8385             :              */
    8386      218907 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
    8387             : 
    8388      218907 :             if (poWK->panDstValid != nullptr)
    8389             :             {
    8390       11750 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    8391             :             }
    8392             :         }
    8393             : 
    8394             :         /* --------------------------------------------------------------------
    8395             :          */
    8396             :         /*      Report progress to the user, and optionally cancel out. */
    8397             :         /* --------------------------------------------------------------------
    8398             :          */
    8399         873 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    8400           0 :             break;
    8401             :     }
    8402             : 
    8403             : #ifdef CHECK_SUM_WITH_GEOS
    8404             :     GEOSGeom_destroy_r(hGEOSContext, hP1);
    8405             :     GEOSGeom_destroy_r(hGEOSContext, hP2);
    8406             :     OGRGeometry::freeGEOSContext(hGEOSContext);
    8407             : #endif
    8408          18 :     CPLQuadTreeDestroy(hQuadTree);
    8409          18 : }

Generated by: LCOV version 1.14