Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: High Performance Image Reprojector
4 : * Purpose: Implementation of the GDALWarpKernel class. Implements the actual
5 : * image warping for a "chunk" of input and output imagery already
6 : * loaded into memory.
7 : * Author: Frank Warmerdam, warmerdam@pobox.com
8 : *
9 : ******************************************************************************
10 : * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
11 : * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
12 : *
13 : * SPDX-License-Identifier: MIT
14 : ****************************************************************************/
15 :
16 : #include "cpl_port.h"
17 : #include "gdalwarper.h"
18 :
19 : #include <cfloat>
20 : #include <cmath>
21 : #include <cstddef>
22 : #include <cstdlib>
23 : #include <cstring>
24 :
25 : #include <algorithm>
26 : #include <limits>
27 : #include <mutex>
28 : #include <new>
29 : #include <utility>
30 : #include <vector>
31 :
32 : #include "cpl_atomic_ops.h"
33 : #include "cpl_conv.h"
34 : #include "cpl_error.h"
35 : #include "cpl_float.h"
36 : #include "cpl_mask.h"
37 : #include "cpl_multiproc.h"
38 : #include "cpl_progress.h"
39 : #include "cpl_string.h"
40 : #include "cpl_vsi.h"
41 : #include "cpl_worker_thread_pool.h"
42 : #include "cpl_quad_tree.h"
43 : #include "gdal.h"
44 : #include "gdal_alg.h"
45 : #include "gdal_alg_priv.h"
46 : #include "gdal_thread_pool.h"
47 : #include "gdalresamplingkernels.h"
48 :
49 : // #define CHECK_SUM_WITH_GEOS
50 : #ifdef CHECK_SUM_WITH_GEOS
51 : #include "ogr_geometry.h"
52 : #include "ogr_geos.h"
53 : #endif
54 :
55 : #ifdef USE_NEON_OPTIMIZATIONS
56 : #include "include_sse2neon.h"
57 : #define USE_SSE2
58 :
59 : #include "gdalsse_priv.h"
60 :
61 : // We restrict to 64bit processors because they are guaranteed to have SSE2.
62 : // Could possibly be used too on 32bit, but we would need to check at runtime.
63 : #elif defined(__x86_64) || defined(_M_X64)
64 : #define USE_SSE2
65 :
66 : #include "gdalsse_priv.h"
67 :
68 : #if __SSE4_1__
69 : #include <smmintrin.h>
70 : #endif
71 :
72 : #if __SSE3__
73 : #include <pmmintrin.h>
74 : #endif
75 :
76 : #endif
77 :
78 : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
79 : constexpr float SRC_DENSITY_THRESHOLD = 0.000000001f;
80 :
81 : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
82 :
83 : static const int anGWKFilterRadius[] = {
84 : 0, // Nearest neighbour
85 : 1, // Bilinear
86 : 2, // Cubic Convolution (Catmull-Rom)
87 : 2, // Cubic B-Spline
88 : 3, // Lanczos windowed sinc
89 : 0, // Average
90 : 0, // Mode
91 : 0, // Reserved GRA_Gauss=7
92 : 0, // Max
93 : 0, // Min
94 : 0, // Med
95 : 0, // Q1
96 : 0, // Q3
97 : 0, // Sum
98 : 0, // RMS
99 : };
100 :
101 : static double GWKBilinear(double dfX);
102 : static double GWKCubic(double dfX);
103 : static double GWKBSpline(double dfX);
104 : static double GWKLanczosSinc(double dfX);
105 :
106 : static const FilterFuncType apfGWKFilter[] = {
107 : nullptr, // Nearest neighbour
108 : GWKBilinear, // Bilinear
109 : GWKCubic, // Cubic Convolution (Catmull-Rom)
110 : GWKBSpline, // Cubic B-Spline
111 : GWKLanczosSinc, // Lanczos windowed sinc
112 : nullptr, // Average
113 : nullptr, // Mode
114 : nullptr, // Reserved GRA_Gauss=7
115 : nullptr, // Max
116 : nullptr, // Min
117 : nullptr, // Med
118 : nullptr, // Q1
119 : nullptr, // Q3
120 : nullptr, // Sum
121 : nullptr, // RMS
122 : };
123 :
124 : // TODO(schwehr): Can we make these functions have a const * const arg?
125 : static double GWKBilinear4Values(double *padfVals);
126 : static double GWKCubic4Values(double *padfVals);
127 : static double GWKBSpline4Values(double *padfVals);
128 : static double GWKLanczosSinc4Values(double *padfVals);
129 :
130 : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
131 : nullptr, // Nearest neighbour
132 : GWKBilinear4Values, // Bilinear
133 : GWKCubic4Values, // Cubic Convolution (Catmull-Rom)
134 : GWKBSpline4Values, // Cubic B-Spline
135 : GWKLanczosSinc4Values, // Lanczos windowed sinc
136 : nullptr, // Average
137 : nullptr, // Mode
138 : nullptr, // Reserved GRA_Gauss=7
139 : nullptr, // Max
140 : nullptr, // Min
141 : nullptr, // Med
142 : nullptr, // Q1
143 : nullptr, // Q3
144 : nullptr, // Sum
145 : nullptr, // RMS
146 : };
147 :
148 9907 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
149 : {
150 : static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
151 : "Bad size of anGWKFilterRadius");
152 9907 : return anGWKFilterRadius[eResampleAlg];
153 : }
154 :
155 3731 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
156 : {
157 : static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
158 : "Bad size of apfGWKFilter");
159 3731 : return apfGWKFilter[eResampleAlg];
160 : }
161 :
162 3732 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
163 : {
164 : static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
165 : "Bad size of apfGWKFilter4Values");
166 3732 : return apfGWKFilter4Values[eResampleAlg];
167 : }
168 :
169 : static CPLErr GWKGeneralCase(GDALWarpKernel *);
170 : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
171 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
172 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
173 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
174 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
175 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
176 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
177 : #endif
178 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
179 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
180 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
181 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
182 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
183 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
184 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
185 : #endif
186 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
187 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
188 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
189 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
190 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
191 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
192 : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
193 : static CPLErr GWKSumPreserving(GDALWarpKernel *);
194 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
195 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
196 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
197 :
198 : /************************************************************************/
199 : /* GWKJobStruct */
200 : /************************************************************************/
201 :
202 : struct GWKJobStruct
203 : {
204 : std::mutex &mutex;
205 : std::condition_variable &cv;
206 : int &counter;
207 : bool &stopFlag;
208 : GDALWarpKernel *poWK;
209 : int iYMin;
210 : int iYMax;
211 : int (*pfnProgress)(GWKJobStruct *psJob);
212 : void *pTransformerArg;
213 : void (*pfnFunc)(
214 : void *); // used by GWKRun() to assign the proper pTransformerArg
215 :
216 2105 : GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
217 : int &counter_, bool &stopFlag_)
218 2105 : : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_),
219 : poWK(nullptr), iYMin(0), iYMax(0), pfnProgress(nullptr),
220 2105 : pTransformerArg(nullptr), pfnFunc(nullptr)
221 : {
222 2105 : }
223 : };
224 :
225 : struct GWKThreadData
226 : {
227 : std::unique_ptr<CPLJobQueue> poJobQueue{};
228 : std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
229 : int nMaxThreads{0};
230 : int counter{0};
231 : bool stopFlag{false};
232 : std::mutex mutex{};
233 : std::condition_variable cv{};
234 : bool bTransformerArgInputAssignedToThread{false};
235 : void *pTransformerArgInput{
236 : nullptr}; // owned by calling layer. Not to be destroyed
237 : std::map<GIntBig, void *> mapThreadToTransformerArg{};
238 : int nTotalThreadCountForThisRun = 0;
239 : int nCurThreadCountForThisRun = 0;
240 : };
241 :
242 : /************************************************************************/
243 : /* GWKProgressThread() */
244 : /************************************************************************/
245 :
246 : // Return TRUE if the computation must be interrupted.
247 5 : static int GWKProgressThread(GWKJobStruct *psJob)
248 : {
249 5 : bool stop = false;
250 : {
251 5 : std::lock_guard<std::mutex> lock(psJob->mutex);
252 5 : psJob->counter++;
253 5 : stop = psJob->stopFlag;
254 : }
255 5 : psJob->cv.notify_one();
256 :
257 5 : return stop;
258 : }
259 :
260 : /************************************************************************/
261 : /* GWKProgressMonoThread() */
262 : /************************************************************************/
263 :
264 : // Return TRUE if the computation must be interrupted.
265 204563 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
266 : {
267 204563 : GDALWarpKernel *poWK = psJob->poWK;
268 : // coverity[missing_lock]
269 204563 : if (!poWK->pfnProgress(
270 204563 : poWK->dfProgressBase +
271 204563 : poWK->dfProgressScale *
272 204563 : (++psJob->counter / static_cast<double>(psJob->iYMax)),
273 : "", poWK->pProgress))
274 : {
275 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
276 1 : psJob->stopFlag = true;
277 1 : return TRUE;
278 : }
279 204562 : return FALSE;
280 : }
281 :
282 : /************************************************************************/
283 : /* GWKGenericMonoThread() */
284 : /************************************************************************/
285 :
286 2100 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
287 : void (*pfnFunc)(void *pUserData))
288 : {
289 2100 : GWKThreadData td;
290 :
291 : // NOTE: the mutex is not used.
292 2100 : GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
293 2100 : job.poWK = poWK;
294 2100 : job.iYMin = 0;
295 2100 : job.iYMax = poWK->nDstYSize;
296 2100 : job.pfnProgress = GWKProgressMonoThread;
297 2100 : job.pTransformerArg = poWK->pTransformerArg;
298 2100 : pfnFunc(&job);
299 :
300 4200 : return td.stopFlag ? CE_Failure : CE_None;
301 : }
302 :
303 : /************************************************************************/
304 : /* GWKThreadsCreate() */
305 : /************************************************************************/
306 :
307 1409 : void *GWKThreadsCreate(char **papszWarpOptions,
308 : GDALTransformerFunc /* pfnTransformer */,
309 : void *pTransformerArg)
310 : {
311 : const char *pszWarpThreads =
312 1409 : CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
313 1409 : if (pszWarpThreads == nullptr)
314 1409 : pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
315 :
316 1409 : int nThreads = 0;
317 1409 : if (EQUAL(pszWarpThreads, "ALL_CPUS"))
318 3 : nThreads = CPLGetNumCPUs();
319 : else
320 1406 : nThreads = atoi(pszWarpThreads);
321 1409 : if (nThreads <= 1)
322 1404 : nThreads = 0;
323 1409 : if (nThreads > 128)
324 0 : nThreads = 128;
325 :
326 1409 : GWKThreadData *psThreadData = new GWKThreadData();
327 : auto poThreadPool =
328 1409 : nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
329 1409 : if (nThreads && poThreadPool)
330 : {
331 5 : psThreadData->nMaxThreads = nThreads;
332 5 : psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
333 : nThreads,
334 5 : GWKJobStruct(psThreadData->mutex, psThreadData->cv,
335 10 : psThreadData->counter, psThreadData->stopFlag)));
336 :
337 5 : psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
338 5 : psThreadData->pTransformerArgInput = pTransformerArg;
339 : }
340 :
341 1409 : return psThreadData;
342 : }
343 :
344 : /************************************************************************/
345 : /* GWKThreadsEnd() */
346 : /************************************************************************/
347 :
348 1409 : void GWKThreadsEnd(void *psThreadDataIn)
349 : {
350 1409 : if (psThreadDataIn == nullptr)
351 0 : return;
352 :
353 1409 : GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
354 1409 : if (psThreadData->poJobQueue)
355 : {
356 : // cppcheck-suppress constVariableReference
357 15 : for (auto &pair : psThreadData->mapThreadToTransformerArg)
358 : {
359 10 : CPLAssert(pair.second != psThreadData->pTransformerArgInput);
360 10 : GDALDestroyTransformer(pair.second);
361 : }
362 5 : psThreadData->poJobQueue.reset();
363 : }
364 1409 : delete psThreadData;
365 : }
366 :
367 : /************************************************************************/
368 : /* ThreadFuncAdapter() */
369 : /************************************************************************/
370 :
371 15 : static void ThreadFuncAdapter(void *pData)
372 : {
373 15 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
374 15 : GWKThreadData *psThreadData =
375 15 : static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
376 :
377 : // Look if we have already a per-thread transformer
378 15 : void *pTransformerArg = nullptr;
379 15 : const GIntBig nThreadId = CPLGetPID();
380 :
381 : {
382 30 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
383 15 : ++psThreadData->nCurThreadCountForThisRun;
384 :
385 15 : auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
386 15 : if (oIter != psThreadData->mapThreadToTransformerArg.end())
387 : {
388 0 : pTransformerArg = oIter->second;
389 : }
390 15 : else if (!psThreadData->bTransformerArgInputAssignedToThread &&
391 15 : psThreadData->nCurThreadCountForThisRun ==
392 15 : psThreadData->nTotalThreadCountForThisRun)
393 : {
394 : // If we are the last thread to be started, temporarily borrow the
395 : // original transformer
396 5 : psThreadData->bTransformerArgInputAssignedToThread = true;
397 5 : pTransformerArg = psThreadData->pTransformerArgInput;
398 5 : psThreadData->mapThreadToTransformerArg[nThreadId] =
399 : pTransformerArg;
400 : }
401 :
402 15 : if (pTransformerArg == nullptr)
403 : {
404 10 : CPLAssert(psThreadData->pTransformerArgInput != nullptr);
405 10 : CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
406 : }
407 : }
408 :
409 : // If no transformer assigned to current thread, instantiate one
410 15 : if (pTransformerArg == nullptr)
411 : {
412 : // This somehow assumes that GDALCloneTransformer() is thread-safe
413 : // which should normally be the case.
414 : pTransformerArg =
415 10 : GDALCloneTransformer(psThreadData->pTransformerArgInput);
416 :
417 : // Lock for the stop flag and the transformer map.
418 10 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
419 10 : if (!pTransformerArg)
420 : {
421 0 : psJob->stopFlag = true;
422 0 : return;
423 : }
424 10 : psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
425 : }
426 :
427 15 : psJob->pTransformerArg = pTransformerArg;
428 15 : psJob->pfnFunc(pData);
429 :
430 : // Give back original transformer, if borrowed.
431 : {
432 30 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
433 15 : if (psThreadData->bTransformerArgInputAssignedToThread &&
434 7 : pTransformerArg == psThreadData->pTransformerArgInput)
435 : {
436 : psThreadData->mapThreadToTransformerArg.erase(
437 5 : psThreadData->mapThreadToTransformerArg.find(nThreadId));
438 5 : psThreadData->bTransformerArgInputAssignedToThread = false;
439 : }
440 : }
441 : }
442 :
443 : /************************************************************************/
444 : /* GWKRun() */
445 : /************************************************************************/
446 :
447 2105 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
448 : void (*pfnFunc)(void *pUserData))
449 :
450 : {
451 2105 : const int nDstYSize = poWK->nDstYSize;
452 :
453 2105 : CPLDebug("GDAL",
454 : "GDALWarpKernel()::%s() "
455 : "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
456 : pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
457 : poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
458 : poWK->nDstYSize);
459 :
460 2105 : if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
461 : {
462 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
463 0 : return CE_Failure;
464 : }
465 :
466 2105 : GWKThreadData *psThreadData =
467 : static_cast<GWKThreadData *>(poWK->psThreadData);
468 2105 : if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
469 : {
470 2100 : return GWKGenericMonoThread(poWK, pfnFunc);
471 : }
472 :
473 5 : int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
474 : // Config option mostly useful for tests to be able to test multithreading
475 : // with small rasters
476 : const int nWarpChunkSize =
477 5 : atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
478 5 : if (nWarpChunkSize > 0)
479 : {
480 3 : GIntBig nChunks =
481 3 : static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
482 3 : if (nThreads > nChunks)
483 1 : nThreads = static_cast<int>(nChunks);
484 : }
485 5 : if (nThreads <= 0)
486 1 : nThreads = 1;
487 :
488 5 : CPLDebug("WARP", "Using %d threads", nThreads);
489 :
490 5 : auto &jobs = *psThreadData->threadJobs;
491 5 : CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
492 : // Fill-in job structures.
493 20 : for (int i = 0; i < nThreads; ++i)
494 : {
495 15 : auto &job = jobs[i];
496 15 : job.poWK = poWK;
497 15 : job.iYMin =
498 15 : static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
499 15 : job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
500 15 : nThreads);
501 15 : if (poWK->pfnProgress != GDALDummyProgress)
502 1 : job.pfnProgress = GWKProgressThread;
503 15 : job.pfnFunc = pfnFunc;
504 : }
505 :
506 : bool bStopFlag;
507 : {
508 5 : std::unique_lock<std::mutex> lock(psThreadData->mutex);
509 :
510 5 : psThreadData->nTotalThreadCountForThisRun = nThreads;
511 : // coverity[missing_lock]
512 5 : psThreadData->nCurThreadCountForThisRun = 0;
513 :
514 : // Start jobs.
515 20 : for (int i = 0; i < nThreads; ++i)
516 : {
517 15 : auto &job = jobs[i];
518 15 : psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
519 : static_cast<void *>(&job));
520 : }
521 :
522 : /* --------------------------------------------------------------------
523 : */
524 : /* Report progress. */
525 : /* --------------------------------------------------------------------
526 : */
527 5 : if (poWK->pfnProgress != GDALDummyProgress)
528 : {
529 4 : while (psThreadData->counter < nDstYSize)
530 : {
531 4 : psThreadData->cv.wait(lock);
532 4 : if (!poWK->pfnProgress(poWK->dfProgressBase +
533 4 : poWK->dfProgressScale *
534 4 : (psThreadData->counter /
535 4 : static_cast<double>(nDstYSize)),
536 : "", poWK->pProgress))
537 : {
538 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
539 1 : psThreadData->stopFlag = true;
540 1 : break;
541 : }
542 : }
543 : }
544 :
545 5 : bStopFlag = psThreadData->stopFlag;
546 : }
547 :
548 : /* -------------------------------------------------------------------- */
549 : /* Wait for all jobs to complete. */
550 : /* -------------------------------------------------------------------- */
551 5 : psThreadData->poJobQueue->WaitCompletion();
552 :
553 5 : return bStopFlag ? CE_Failure : CE_None;
554 : }
555 :
556 : /************************************************************************/
557 : /* ==================================================================== */
558 : /* GDALWarpKernel */
559 : /* ==================================================================== */
560 : /************************************************************************/
561 :
562 : /**
563 : * \class GDALWarpKernel "gdalwarper.h"
564 : *
565 : * Low level image warping class.
566 : *
567 : * This class is responsible for low level image warping for one
568 : * "chunk" of imagery. The class is essentially a structure with all
569 : * data members public - primarily so that new special-case functions
570 : * can be added without changing the class declaration.
571 : *
572 : * Applications are normally intended to interactive with warping facilities
573 : * through the GDALWarpOperation class, though the GDALWarpKernel can in
574 : * theory be used directly if great care is taken in setting up the
575 : * control data.
576 : *
577 : * <h3>Design Issues</h3>
578 : *
579 : * The intention is that PerformWarp() would analyze the setup in terms
580 : * of the datatype, resampling type, and validity/density mask usage and
581 : * pick one of many specific implementations of the warping algorithm over
582 : * a continuum of optimization vs. generality. At one end there will be a
583 : * reference general purpose implementation of the algorithm that supports
584 : * any data type (working internally in double precision complex), all three
585 : * resampling types, and any or all of the validity/density masks. At the
586 : * other end would be highly optimized algorithms for common cases like
587 : * nearest neighbour resampling on GDT_Byte data with no masks.
588 : *
589 : * The full set of optimized versions have not been decided but we should
590 : * expect to have at least:
591 : * - One for each resampling algorithm for 8bit data with no masks.
592 : * - One for each resampling algorithm for float data with no masks.
593 : * - One for each resampling algorithm for float data with any/all masks
594 : * (essentially the generic case for just float data).
595 : * - One for each resampling algorithm for 8bit data with support for
596 : * input validity masks (per band or per pixel). This handles the common
597 : * case of nodata masking.
598 : * - One for each resampling algorithm for float data with support for
599 : * input validity masks (per band or per pixel). This handles the common
600 : * case of nodata masking.
601 : *
602 : * Some of the specializations would operate on all bands in one pass
603 : * (especially the ones without masking would do this), while others might
604 : * process each band individually to reduce code complexity.
605 : *
606 : * <h3>Masking Semantics</h3>
607 : *
608 : * A detailed explanation of the semantics of the validity and density masks,
609 : * and their effects on resampling kernels is needed here.
610 : */
611 :
612 : /************************************************************************/
613 : /* GDALWarpKernel Data Members */
614 : /************************************************************************/
615 :
616 : /**
617 : * \var GDALResampleAlg GDALWarpKernel::eResample;
618 : *
619 : * Resampling algorithm.
620 : *
621 : * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear,
622 : * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
623 : * GRA_Mode or GRA_Sum.
624 : *
625 : * This field is required. GDT_NearestNeighbour may be used as a default
626 : * value.
627 : */
628 :
629 : /**
630 : * \var GDALDataType GDALWarpKernel::eWorkingDataType;
631 : *
632 : * Working pixel data type.
633 : *
634 : * The datatype of pixels in the source image (papabySrcimage) and
635 : * destination image (papabyDstImage) buffers. Note that operations on
636 : * some data types (such as GDT_Byte) may be much better optimized than other
637 : * less common cases.
638 : *
639 : * This field is required. It may not be GDT_Unknown.
640 : */
641 :
642 : /**
643 : * \var int GDALWarpKernel::nBands;
644 : *
645 : * Number of bands.
646 : *
647 : * The number of bands (layers) of imagery being warped. Determines the
648 : * number of entries in the papabySrcImage, papanBandSrcValid,
649 : * and papabyDstImage arrays.
650 : *
651 : * This field is required.
652 : */
653 :
654 : /**
655 : * \var int GDALWarpKernel::nSrcXSize;
656 : *
657 : * Source image width in pixels.
658 : *
659 : * This field is required.
660 : */
661 :
662 : /**
663 : * \var int GDALWarpKernel::nSrcYSize;
664 : *
665 : * Source image height in pixels.
666 : *
667 : * This field is required.
668 : */
669 :
670 : /**
671 : * \var double GDALWarpKernel::dfSrcXExtraSize;
672 : *
673 : * Number of pixels included in nSrcXSize that are present on the edges of
674 : * the area of interest to take into account the width of the kernel.
675 : *
676 : * This field is required.
677 : */
678 :
679 : /**
680 : * \var double GDALWarpKernel::dfSrcYExtraSize;
681 : *
682 : * Number of pixels included in nSrcYExtraSize that are present on the edges of
683 : * the area of interest to take into account the height of the kernel.
684 : *
685 : * This field is required.
686 : */
687 :
688 : /**
689 : * \var int GDALWarpKernel::papabySrcImage;
690 : *
691 : * Array of source image band data.
692 : *
693 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
694 : * to image data. Each individual band of image data is organized as a single
695 : * block of image data in left to right, then bottom to top order. The actual
696 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
697 : *
698 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
699 : * the second band with eWorkingDataType set to GDT_Float32 use code like
700 : * this:
701 : *
702 : * \code
703 : * float dfPixelValue;
704 : * int nBand = 2-1; // Band indexes are zero based.
705 : * int nPixel = 3; // Zero based.
706 : * int nLine = 4; // Zero based.
707 : *
708 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
709 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
710 : * assert( nBand >= 0 && nBand < poKern->nBands );
711 : * dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
712 : * [nPixel + nLine * poKern->nSrcXSize];
713 : * \endcode
714 : *
715 : * This field is required.
716 : */
717 :
718 : /**
719 : * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
720 : *
721 : * Per band validity mask for source pixels.
722 : *
723 : * Array of pixel validity mask layers for each source band. Each of
724 : * the mask layers is the same size (in pixels) as the source image with
725 : * one bit per pixel. Note that it is legal (and common) for this to be
726 : * NULL indicating that none of the pixels are invalidated, or for some
727 : * band validity masks to be NULL in which case all pixels of the band are
728 : * valid. The following code can be used to test the validity of a particular
729 : * pixel.
730 : *
731 : * \code
732 : * int bIsValid = TRUE;
733 : * int nBand = 2-1; // Band indexes are zero based.
734 : * int nPixel = 3; // Zero based.
735 : * int nLine = 4; // Zero based.
736 : *
737 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
738 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
739 : * assert( nBand >= 0 && nBand < poKern->nBands );
740 : *
741 : * if( poKern->papanBandSrcValid != NULL
742 : * && poKern->papanBandSrcValid[nBand] != NULL )
743 : * {
744 : * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
745 : * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
746 : *
747 : * bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
748 : * }
749 : * \endcode
750 : */
751 :
752 : /**
753 : * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
754 : *
755 : * Per pixel validity mask for source pixels.
756 : *
757 : * A single validity mask layer that applies to the pixels of all source
758 : * bands. It is accessed similarly to papanBandSrcValid, but without the
759 : * extra level of band indirection.
760 : *
761 : * This pointer may be NULL indicating that all pixels are valid.
762 : *
763 : * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
764 : * the pixel isn't considered to be valid unless both arrays indicate it is
765 : * valid.
766 : */
767 :
768 : /**
769 : * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
770 : *
771 : * Per pixel density mask for source pixels.
772 : *
773 : * A single density mask layer that applies to the pixels of all source
774 : * bands. It contains values between 0.0 and 1.0 indicating the degree to
775 : * which this pixel should be allowed to contribute to the output result.
776 : *
777 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
778 : *
779 : * The density for a pixel may be accessed like this:
780 : *
781 : * \code
782 : * float fDensity = 1.0;
783 : * int nPixel = 3; // Zero based.
784 : * int nLine = 4; // Zero based.
785 : *
786 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
787 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
788 : * if( poKern->pafUnifiedSrcDensity != NULL )
789 : * fDensity = poKern->pafUnifiedSrcDensity
790 : * [nPixel + nLine * poKern->nSrcXSize];
791 : * \endcode
792 : */
793 :
794 : /**
795 : * \var int GDALWarpKernel::nDstXSize;
796 : *
797 : * Width of destination image in pixels.
798 : *
799 : * This field is required.
800 : */
801 :
802 : /**
803 : * \var int GDALWarpKernel::nDstYSize;
804 : *
805 : * Height of destination image in pixels.
806 : *
807 : * This field is required.
808 : */
809 :
810 : /**
811 : * \var GByte **GDALWarpKernel::papabyDstImage;
812 : *
813 : * Array of destination image band data.
814 : *
815 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
816 : * to image data. Each individual band of image data is organized as a single
817 : * block of image data in left to right, then bottom to top order. The actual
818 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
819 : *
820 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
821 : * the second band with eWorkingDataType set to GDT_Float32 use code like
822 : * this:
823 : *
824 : * \code
825 : * float dfPixelValue;
826 : * int nBand = 2-1; // Band indexes are zero based.
827 : * int nPixel = 3; // Zero based.
828 : * int nLine = 4; // Zero based.
829 : *
830 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
831 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
832 : * assert( nBand >= 0 && nBand < poKern->nBands );
833 : * dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
834 : * [nPixel + nLine * poKern->nSrcYSize];
835 : * \endcode
836 : *
837 : * This field is required.
838 : */
839 :
840 : /**
841 : * \var GUInt32 *GDALWarpKernel::panDstValid;
842 : *
843 : * Per pixel validity mask for destination pixels.
844 : *
845 : * A single validity mask layer that applies to the pixels of all destination
846 : * bands. It is accessed similarly to papanUnitifiedSrcValid, but based
847 : * on the size of the destination image.
848 : *
849 : * This pointer may be NULL indicating that all pixels are valid.
850 : */
851 :
852 : /**
853 : * \var float *GDALWarpKernel::pafDstDensity;
854 : *
855 : * Per pixel density mask for destination pixels.
856 : *
857 : * A single density mask layer that applies to the pixels of all destination
858 : * bands. It contains values between 0.0 and 1.0.
859 : *
860 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
861 : *
862 : * The density for a pixel may be accessed like this:
863 : *
864 : * \code
865 : * float fDensity = 1.0;
866 : * int nPixel = 3; // Zero based.
867 : * int nLine = 4; // Zero based.
868 : *
869 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
870 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
871 : * if( poKern->pafDstDensity != NULL )
872 : * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
873 : * \endcode
874 : */
875 :
876 : /**
877 : * \var int GDALWarpKernel::nSrcXOff;
878 : *
879 : * X offset to source pixel coordinates for transformation.
880 : *
881 : * See pfnTransformer.
882 : *
883 : * This field is required.
884 : */
885 :
886 : /**
887 : * \var int GDALWarpKernel::nSrcYOff;
888 : *
889 : * Y offset to source pixel coordinates for transformation.
890 : *
891 : * See pfnTransformer.
892 : *
893 : * This field is required.
894 : */
895 :
896 : /**
897 : * \var int GDALWarpKernel::nDstXOff;
898 : *
899 : * X offset to destination pixel coordinates for transformation.
900 : *
901 : * See pfnTransformer.
902 : *
903 : * This field is required.
904 : */
905 :
906 : /**
907 : * \var int GDALWarpKernel::nDstYOff;
908 : *
909 : * Y offset to destination pixel coordinates for transformation.
910 : *
911 : * See pfnTransformer.
912 : *
913 : * This field is required.
914 : */
915 :
916 : /**
917 : * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
918 : *
919 : * Source/destination location transformer.
920 : *
921 : * The function to call to transform coordinates between source image
922 : * pixel/line coordinates and destination image pixel/line coordinates.
923 : * See GDALTransformerFunc() for details of the semantics of this function.
924 : *
925 : * The GDALWarpKern algorithm will only ever use this transformer in
926 : * "destination to source" mode (bDstToSrc=TRUE), and will always pass
927 : * partial or complete scanlines of points in the destination image as
928 : * input. This means, among other things, that it is safe to the
929 : * approximating transform GDALApproxTransform() as the transformation
930 : * function.
931 : *
932 : * Source and destination images may be subsets of a larger overall image.
933 : * The transformation algorithms will expect and return pixel/line coordinates
934 : * in terms of this larger image, so coordinates need to be offset by
935 : * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
936 : * passing to pfnTransformer, and after return from it.
937 : *
938 : * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
939 : * data to this function when it is called.
940 : *
941 : * This field is required.
942 : */
943 :
944 : /**
945 : * \var void *GDALWarpKernel::pTransformerArg;
946 : *
947 : * Callback data for pfnTransformer.
948 : *
949 : * This field may be NULL if not required for the pfnTransformer being used.
950 : */
951 :
952 : /**
953 : * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
954 : *
955 : * The function to call to report progress of the algorithm, and to check
956 : * for a requested termination of the operation. It operates according to
957 : * GDALProgressFunc() semantics.
958 : *
959 : * Generally speaking the progress function will be invoked for each
960 : * scanline of the destination buffer that has been processed.
961 : *
962 : * This field may be NULL (internally set to GDALDummyProgress()).
963 : */
964 :
965 : /**
966 : * \var void *GDALWarpKernel::pProgress;
967 : *
968 : * Callback data for pfnProgress.
969 : *
970 : * This field may be NULL if not required for the pfnProgress being used.
971 : */
972 :
973 : /************************************************************************/
974 : /* GDALWarpKernel() */
975 : /************************************************************************/
976 :
977 2115 : GDALWarpKernel::GDALWarpKernel()
978 : : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
979 : eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
980 : dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
981 : papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
982 : pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
983 : papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
984 : dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
985 : nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
986 : nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
987 : pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
988 : pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
989 : padfDstNoDataReal(nullptr), psThreadData(nullptr),
990 2115 : eTieStrategy(GWKTS_First)
991 : {
992 2115 : }
993 :
994 : /************************************************************************/
995 : /* ~GDALWarpKernel() */
996 : /************************************************************************/
997 :
998 2115 : GDALWarpKernel::~GDALWarpKernel()
999 : {
1000 2115 : }
1001 :
1002 : /************************************************************************/
1003 : /* PerformWarp() */
1004 : /************************************************************************/
1005 :
1006 : /**
1007 : * \fn CPLErr GDALWarpKernel::PerformWarp();
1008 : *
1009 : * This method performs the warp described in the GDALWarpKernel.
1010 : *
1011 : * @return CE_None on success or CE_Failure if an error occurs.
1012 : */
1013 :
1014 2113 : CPLErr GDALWarpKernel::PerformWarp()
1015 :
1016 : {
1017 2113 : const CPLErr eErr = Validate();
1018 :
1019 2113 : if (eErr != CE_None)
1020 1 : return eErr;
1021 :
1022 : // See #2445 and #3079.
1023 2112 : if (nSrcXSize <= 0 || nSrcYSize <= 0)
1024 : {
1025 7 : if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
1026 : {
1027 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
1028 0 : return CE_Failure;
1029 : }
1030 7 : return CE_None;
1031 : }
1032 :
1033 : /* -------------------------------------------------------------------- */
1034 : /* Pre-calculate resampling scales and window sizes for filtering. */
1035 : /* -------------------------------------------------------------------- */
1036 :
1037 2105 : dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
1038 2105 : dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
1039 2105 : if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
1040 1334 : dfXScale = 1.0;
1041 2105 : if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
1042 1039 : dfYScale = 1.0;
1043 2105 : if (dfXScale < 1.0)
1044 : {
1045 550 : double dfXReciprocalScale = 1.0 / dfXScale;
1046 550 : const int nXReciprocalScale =
1047 550 : static_cast<int>(dfXReciprocalScale + 0.5);
1048 550 : if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
1049 432 : dfXScale = 1.0 / nXReciprocalScale;
1050 : }
1051 2105 : if (dfYScale < 1.0)
1052 : {
1053 518 : double dfYReciprocalScale = 1.0 / dfYScale;
1054 518 : const int nYReciprocalScale =
1055 518 : static_cast<int>(dfYReciprocalScale + 0.5);
1056 518 : if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
1057 369 : dfYScale = 1.0 / nYReciprocalScale;
1058 : }
1059 :
1060 : // XSCALE and YSCALE undocumented for now. Can help in some cases.
1061 : // Best would probably be a per-pixel scale computation.
1062 2105 : const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
1063 2105 : if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
1064 1 : dfXScale = CPLAtof(pszXScale);
1065 2105 : const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
1066 2105 : if (pszYScale != nullptr)
1067 1 : dfYScale = CPLAtof(pszYScale);
1068 :
1069 : // If the xscale is significantly lower than the yscale, this is highly
1070 : // suspicious of a situation of wrapping a very large virtual file in
1071 : // geographic coordinates with left and right parts being close to the
1072 : // antimeridian. In that situation, the xscale computed by the above method
1073 : // is completely wrong. Prefer doing an average of a few sample points
1074 : // instead
1075 2105 : if ((dfYScale / dfXScale > 100 ||
1076 1 : (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
1077 : {
1078 : // Sample points along a grid
1079 4 : const int nPointsX = std::min(10, nDstXSize);
1080 4 : const int nPointsY = std::min(10, nDstYSize);
1081 4 : const int nPoints = 3 * nPointsX * nPointsY;
1082 8 : std::vector<double> padfX;
1083 8 : std::vector<double> padfY;
1084 8 : std::vector<double> padfZ(nPoints);
1085 8 : std::vector<int> pabSuccess(nPoints);
1086 44 : for (int iY = 0; iY < nPointsY; iY++)
1087 : {
1088 440 : for (int iX = 0; iX < nPointsX; iX++)
1089 : {
1090 400 : const double dfX =
1091 : nPointsX == 1
1092 400 : ? 0.0
1093 400 : : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
1094 400 : const double dfY =
1095 : nPointsY == 1
1096 400 : ? 0.0
1097 400 : : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
1098 :
1099 : // Reproject each destination sample point and its neighbours
1100 : // at (x+1,y) and (x,y+1), so as to get the local scale.
1101 400 : padfX.push_back(dfX);
1102 400 : padfY.push_back(dfY);
1103 :
1104 400 : padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
1105 400 : padfY.push_back(dfY);
1106 :
1107 400 : padfX.push_back(dfX);
1108 400 : padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
1109 : }
1110 : }
1111 4 : pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
1112 4 : &padfZ[0], &pabSuccess[0]);
1113 :
1114 : // Compute the xscale at each sampling point
1115 8 : std::vector<double> adfXScales;
1116 404 : for (int i = 0; i < nPoints; i += 3)
1117 : {
1118 400 : if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
1119 : {
1120 : const double dfPointXScale =
1121 400 : 1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
1122 800 : std::abs(padfX[i + 2] - padfX[i]));
1123 400 : adfXScales.push_back(dfPointXScale);
1124 : }
1125 : }
1126 :
1127 : // Sort by increasing xcale
1128 4 : std::sort(adfXScales.begin(), adfXScales.end());
1129 :
1130 4 : if (!adfXScales.empty())
1131 : {
1132 : // Compute the average of scales, but eliminate outliers small
1133 : // scales, if some samples are just along the discontinuity.
1134 4 : const double dfMaxPointXScale = adfXScales.back();
1135 4 : double dfSumPointXScale = 0;
1136 4 : int nCountPointScale = 0;
1137 404 : for (double dfPointXScale : adfXScales)
1138 : {
1139 400 : if (dfPointXScale > dfMaxPointXScale / 10)
1140 : {
1141 398 : dfSumPointXScale += dfPointXScale;
1142 398 : nCountPointScale++;
1143 : }
1144 : }
1145 4 : if (nCountPointScale > 0) // should always be true
1146 : {
1147 4 : const double dfXScaleFromSampling =
1148 4 : dfSumPointXScale / nCountPointScale;
1149 : #if DEBUG_VERBOSE
1150 : CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
1151 : dfXScaleFromSampling);
1152 : #endif
1153 4 : dfXScale = dfXScaleFromSampling;
1154 : }
1155 : }
1156 : }
1157 :
1158 : #if DEBUG_VERBOSE
1159 : CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
1160 : #endif
1161 :
1162 2105 : const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
1163 :
1164 : // Safety check for callers that would use GDALWarpKernel without using
1165 : // GDALWarpOperation.
1166 2042 : if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
1167 1979 : ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
1168 4210 : !bUse4SamplesFormula)) &&
1169 388 : atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
1170 : WARP_EXTRA_ELTS)
1171 : {
1172 0 : CPLError(CE_Failure, CPLE_AppDefined,
1173 : "Source arrays must have WARP_EXTRA_ELTS extra elements at "
1174 : "their end. "
1175 : "See GDALWarpKernel class definition. If this condition is "
1176 : "fulfilled, define a EXTRA_ELTS=%d warp options",
1177 : WARP_EXTRA_ELTS);
1178 0 : return CE_Failure;
1179 : }
1180 :
1181 2105 : dfXFilter = anGWKFilterRadius[eResample];
1182 2105 : dfYFilter = anGWKFilterRadius[eResample];
1183 :
1184 2105 : nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
1185 1637 : : static_cast<int>(dfXFilter);
1186 2105 : nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
1187 1615 : : static_cast<int>(dfYFilter);
1188 :
1189 : // Filter window offset depends on the parity of the kernel radius.
1190 2105 : nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
1191 2105 : nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
1192 :
1193 2105 : bApplyVerticalShift =
1194 2105 : CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
1195 2105 : dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
1196 2105 : papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
1197 :
1198 : /* -------------------------------------------------------------------- */
1199 : /* Set up resampling functions. */
1200 : /* -------------------------------------------------------------------- */
1201 2105 : if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
1202 12 : return GWKGeneralCase(this);
1203 :
1204 2093 : const bool bNoMasksOrDstDensityOnly =
1205 2089 : papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
1206 4182 : pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
1207 :
1208 2093 : if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour &&
1209 : bNoMasksOrDstDensityOnly)
1210 896 : return GWKNearestNoMasksOrDstDensityOnlyByte(this);
1211 :
1212 1197 : if (eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear &&
1213 : bNoMasksOrDstDensityOnly)
1214 126 : return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
1215 :
1216 1071 : if (eWorkingDataType == GDT_Byte && eResample == GRA_Cubic &&
1217 : bNoMasksOrDstDensityOnly)
1218 72 : return GWKCubicNoMasksOrDstDensityOnlyByte(this);
1219 :
1220 999 : if (eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline &&
1221 : bNoMasksOrDstDensityOnly)
1222 12 : return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
1223 :
1224 987 : if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour)
1225 324 : return GWKNearestByte(this);
1226 :
1227 663 : if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1228 122 : eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
1229 14 : return GWKNearestNoMasksOrDstDensityOnlyShort(this);
1230 :
1231 649 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
1232 : bNoMasksOrDstDensityOnly)
1233 5 : return GWKCubicNoMasksOrDstDensityOnlyShort(this);
1234 :
1235 644 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
1236 : bNoMasksOrDstDensityOnly)
1237 6 : return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
1238 :
1239 638 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
1240 : bNoMasksOrDstDensityOnly)
1241 5 : return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
1242 :
1243 633 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
1244 : bNoMasksOrDstDensityOnly)
1245 12 : return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
1246 :
1247 621 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
1248 : bNoMasksOrDstDensityOnly)
1249 5 : return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
1250 :
1251 616 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
1252 : bNoMasksOrDstDensityOnly)
1253 6 : return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
1254 :
1255 610 : if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
1256 23 : return GWKNearestShort(this);
1257 :
1258 587 : if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
1259 0 : return GWKNearestUnsignedShort(this);
1260 :
1261 587 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
1262 : bNoMasksOrDstDensityOnly)
1263 11 : return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
1264 :
1265 576 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
1266 37 : return GWKNearestFloat(this);
1267 :
1268 539 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
1269 : bNoMasksOrDstDensityOnly)
1270 4 : return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
1271 :
1272 535 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
1273 : bNoMasksOrDstDensityOnly)
1274 9 : return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
1275 :
1276 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
1277 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
1278 : bNoMasksOrDstDensityOnly)
1279 : return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
1280 :
1281 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
1282 : bNoMasksOrDstDensityOnly)
1283 : return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
1284 : #endif
1285 :
1286 526 : if (eResample == GRA_Average)
1287 71 : return GWKAverageOrMode(this);
1288 :
1289 455 : if (eResample == GRA_RMS)
1290 9 : return GWKAverageOrMode(this);
1291 :
1292 446 : if (eResample == GRA_Mode)
1293 23 : return GWKAverageOrMode(this);
1294 :
1295 423 : if (eResample == GRA_Max)
1296 6 : return GWKAverageOrMode(this);
1297 :
1298 417 : if (eResample == GRA_Min)
1299 5 : return GWKAverageOrMode(this);
1300 :
1301 412 : if (eResample == GRA_Med)
1302 6 : return GWKAverageOrMode(this);
1303 :
1304 406 : if (eResample == GRA_Q1)
1305 5 : return GWKAverageOrMode(this);
1306 :
1307 401 : if (eResample == GRA_Q3)
1308 5 : return GWKAverageOrMode(this);
1309 :
1310 396 : if (eResample == GRA_Sum)
1311 18 : return GWKSumPreserving(this);
1312 :
1313 378 : if (!GDALDataTypeIsComplex(eWorkingDataType))
1314 : {
1315 151 : return GWKRealCase(this);
1316 : }
1317 :
1318 227 : return GWKGeneralCase(this);
1319 : }
1320 :
1321 : /************************************************************************/
1322 : /* Validate() */
1323 : /************************************************************************/
1324 :
1325 : /**
1326 : * \fn CPLErr GDALWarpKernel::Validate()
1327 : *
1328 : * Check the settings in the GDALWarpKernel, and issue a CPLError()
1329 : * (and return CE_Failure) if the configuration is considered to be
1330 : * invalid for some reason.
1331 : *
1332 : * This method will also do some standard defaulting such as setting
1333 : * pfnProgress to GDALDummyProgress() if it is NULL.
1334 : *
1335 : * @return CE_None on success or CE_Failure if an error is detected.
1336 : */
1337 :
1338 2113 : CPLErr GDALWarpKernel::Validate()
1339 :
1340 : {
1341 2113 : if (static_cast<size_t>(eResample) >=
1342 : (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
1343 : {
1344 0 : CPLError(CE_Failure, CPLE_AppDefined,
1345 : "Unsupported resampling method %d.",
1346 0 : static_cast<int>(eResample));
1347 0 : return CE_Failure;
1348 : }
1349 :
1350 : // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
1351 : // be ignored as contributing source pixels during resampling. Only taken into account by
1352 : // Average currently
1353 : const char *pszExcludedValues =
1354 2113 : CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
1355 2113 : if (pszExcludedValues)
1356 : {
1357 : const CPLStringList aosTokens(
1358 8 : CSLTokenizeString2(pszExcludedValues, "(,)", 0));
1359 8 : if ((aosTokens.size() % nBands) != 0)
1360 : {
1361 1 : CPLError(CE_Failure, CPLE_AppDefined,
1362 : "EXCLUDED_VALUES should contain one or several tuples of "
1363 : "%d values formatted like <R>,<G>,<B> or "
1364 : "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
1365 : "tuples",
1366 : nBands);
1367 1 : return CE_Failure;
1368 : }
1369 14 : std::vector<double> adfTuple;
1370 28 : for (int i = 0; i < aosTokens.size(); ++i)
1371 : {
1372 21 : adfTuple.push_back(CPLAtof(aosTokens[i]));
1373 21 : if (((i + 1) % nBands) == 0)
1374 : {
1375 7 : m_aadfExcludedValues.push_back(adfTuple);
1376 7 : adfTuple.clear();
1377 : }
1378 : }
1379 : }
1380 :
1381 2112 : return CE_None;
1382 : }
1383 :
1384 : /************************************************************************/
1385 : /* GWKOverlayDensity() */
1386 : /* */
1387 : /* Compute the final density for the destination pixel. This */
1388 : /* is a function of the overlay density (passed in) and the */
1389 : /* original density. */
1390 : /************************************************************************/
1391 :
1392 8933090 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
1393 : double dfDensity)
1394 : {
1395 8933090 : if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
1396 7742210 : return;
1397 :
1398 1190880 : poWK->pafDstDensity[iDstOffset] = static_cast<float>(
1399 1190880 : 1.0 - (1.0 - dfDensity) * (1.0 - poWK->pafDstDensity[iDstOffset]));
1400 : }
1401 :
1402 : /************************************************************************/
1403 : /* GWKRoundValueT() */
1404 : /************************************************************************/
1405 :
1406 : template <class T, bool is_signed> struct sGWKRoundValueT
1407 : {
1408 : static T eval(double);
1409 : };
1410 :
1411 : template <class T> struct sGWKRoundValueT<T, true> /* signed */
1412 : {
1413 791525 : static T eval(double dfValue)
1414 : {
1415 791525 : return static_cast<T>(floor(dfValue + 0.5));
1416 : }
1417 : };
1418 :
1419 : template <class T> struct sGWKRoundValueT<T, false> /* unsigned */
1420 : {
1421 12954881 : static T eval(double dfValue)
1422 : {
1423 12954881 : return static_cast<T>(dfValue + 0.5);
1424 : }
1425 : };
1426 :
1427 13740806 : template <class T> static T GWKRoundValueT(double dfValue)
1428 : {
1429 13740806 : return sGWKRoundValueT<T, cpl::NumericLimits<T>::is_signed>::eval(dfValue);
1430 : }
1431 :
1432 268974 : template <> float GWKRoundValueT<float>(double dfValue)
1433 : {
1434 268974 : return static_cast<float>(dfValue);
1435 : }
1436 :
1437 : #ifdef notused
1438 : template <> double GWKRoundValueT<double>(double dfValue)
1439 : {
1440 : return dfValue;
1441 : }
1442 : #endif
1443 :
1444 : /************************************************************************/
1445 : /* GWKClampValueT() */
1446 : /************************************************************************/
1447 :
1448 10313444 : template <class T> static CPL_INLINE T GWKClampValueT(double dfValue)
1449 : {
1450 10313444 : if (dfValue < cpl::NumericLimits<T>::min())
1451 3969 : return cpl::NumericLimits<T>::min();
1452 10364686 : else if (dfValue > cpl::NumericLimits<T>::max())
1453 18463 : return cpl::NumericLimits<T>::max();
1454 : else
1455 10321116 : return GWKRoundValueT<T>(dfValue);
1456 : }
1457 :
1458 718914 : template <> float GWKClampValueT<float>(double dfValue)
1459 : {
1460 718914 : return static_cast<float>(dfValue);
1461 : }
1462 :
1463 : #ifdef notused
1464 : template <> double GWKClampValueT<double>(double dfValue)
1465 : {
1466 : return dfValue;
1467 : }
1468 : #endif
1469 :
1470 : /************************************************************************/
1471 : /* AvoidNoData() */
1472 : /************************************************************************/
1473 :
1474 : template <class T>
1475 12865062 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
1476 : GPtrDiff_t iDstOffset)
1477 : {
1478 12865062 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1479 12865062 : T *pDst = reinterpret_cast<T *>(pabyDst);
1480 :
1481 12865062 : if (poWK->padfDstNoDataReal != nullptr &&
1482 6729947 : poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
1483 : {
1484 : if constexpr (cpl::NumericLimits<T>::is_integer)
1485 : {
1486 2637 : if (pDst[iDstOffset] ==
1487 2637 : static_cast<T>(cpl::NumericLimits<T>::lowest()))
1488 : {
1489 2509 : pDst[iDstOffset] =
1490 2509 : static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
1491 : }
1492 : else
1493 128 : pDst[iDstOffset]--;
1494 : }
1495 : else
1496 : {
1497 64 : if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
1498 : {
1499 : using std::nextafter;
1500 0 : pDst[iDstOffset] =
1501 0 : nextafter(pDst[iDstOffset], static_cast<T>(0));
1502 : }
1503 : else
1504 : {
1505 : using std::nextafter;
1506 64 : pDst[iDstOffset] =
1507 64 : nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
1508 : }
1509 : }
1510 :
1511 2701 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1512 : {
1513 25 : const_cast<GDALWarpKernel *>(poWK)
1514 : ->bWarnedAboutDstNoDataReplacement = true;
1515 25 : CPLError(CE_Warning, CPLE_AppDefined,
1516 : "Value %g in the source dataset has been changed to %g "
1517 : "in the destination dataset to avoid being treated as "
1518 : "NoData. To avoid this, select a different NoData value "
1519 : "for the destination dataset.",
1520 25 : poWK->padfDstNoDataReal[iBand],
1521 25 : static_cast<double>(pDst[iDstOffset]));
1522 : }
1523 : }
1524 12865062 : }
1525 :
1526 : /************************************************************************/
1527 : /* GWKSetPixelValueRealT() */
1528 : /************************************************************************/
1529 :
1530 : template <class T>
1531 8159107 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
1532 : GPtrDiff_t iDstOffset, double dfDensity,
1533 : T value)
1534 : {
1535 8159107 : T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
1536 :
1537 : /* -------------------------------------------------------------------- */
1538 : /* If the source density is less than 100% we need to fetch the */
1539 : /* existing destination value, and mix it with the source to */
1540 : /* get the new "to apply" value. Also compute composite */
1541 : /* density. */
1542 : /* */
1543 : /* We avoid mixing if density is very near one or risk mixing */
1544 : /* in very extreme nodata values and causing odd results (#1610) */
1545 : /* -------------------------------------------------------------------- */
1546 8159107 : if (dfDensity < 0.9999)
1547 : {
1548 81504 : if (dfDensity < 0.0001)
1549 0 : return true;
1550 :
1551 81504 : double dfDstDensity = 1.0;
1552 :
1553 81504 : if (poWK->pafDstDensity != nullptr)
1554 80032 : dfDstDensity = poWK->pafDstDensity[iDstOffset];
1555 1472 : else if (poWK->panDstValid != nullptr &&
1556 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1557 0 : dfDstDensity = 0.0;
1558 :
1559 : // It seems like we also ought to be testing panDstValid[] here!
1560 :
1561 81504 : const double dfDstReal = pDst[iDstOffset];
1562 :
1563 : // The destination density is really only relative to the portion
1564 : // not occluded by the overlay.
1565 81504 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1566 :
1567 81504 : const double dfReal = (value * dfDensity + dfDstReal * dfDstInfluence) /
1568 81504 : (dfDensity + dfDstInfluence);
1569 :
1570 : /* --------------------------------------------------------------------
1571 : */
1572 : /* Actually apply the destination value. */
1573 : /* */
1574 : /* Avoid using the destination nodata value for integer datatypes
1575 : */
1576 : /* if by chance it is equal to the computed pixel value. */
1577 : /* --------------------------------------------------------------------
1578 : */
1579 81504 : pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
1580 : }
1581 : else
1582 : {
1583 8077598 : pDst[iDstOffset] = value;
1584 : }
1585 :
1586 8159107 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1587 :
1588 8159107 : return true;
1589 : }
1590 :
1591 : /************************************************************************/
1592 : /* ClampRoundAndAvoidNoData() */
1593 : /************************************************************************/
1594 :
1595 : template <class T>
1596 4705975 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
1597 : GPtrDiff_t iDstOffset, double dfReal)
1598 : {
1599 4705975 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1600 4705975 : T *pDst = reinterpret_cast<T *>(pabyDst);
1601 :
1602 : if constexpr (cpl::NumericLimits<T>::is_integer)
1603 : {
1604 : using std::floor;
1605 4223079 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
1606 1638 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
1607 4221439 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1608 13640 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
1609 : else if constexpr (cpl::NumericLimits<T>::is_signed)
1610 13239 : pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
1611 : else
1612 4194560 : pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
1613 : }
1614 : else
1615 : {
1616 482896 : pDst[iDstOffset] = static_cast<T>(dfReal);
1617 : }
1618 :
1619 4705975 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1620 4705975 : }
1621 :
1622 : /************************************************************************/
1623 : /* GWKSetPixelValue() */
1624 : /************************************************************************/
1625 :
1626 3867240 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
1627 : GPtrDiff_t iDstOffset, double dfDensity,
1628 : double dfReal, double dfImag)
1629 :
1630 : {
1631 3867240 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1632 :
1633 : /* -------------------------------------------------------------------- */
1634 : /* If the source density is less than 100% we need to fetch the */
1635 : /* existing destination value, and mix it with the source to */
1636 : /* get the new "to apply" value. Also compute composite */
1637 : /* density. */
1638 : /* */
1639 : /* We avoid mixing if density is very near one or risk mixing */
1640 : /* in very extreme nodata values and causing odd results (#1610) */
1641 : /* -------------------------------------------------------------------- */
1642 3867240 : if (dfDensity < 0.9999)
1643 : {
1644 800 : if (dfDensity < 0.0001)
1645 0 : return true;
1646 :
1647 800 : double dfDstDensity = 1.0;
1648 800 : if (poWK->pafDstDensity != nullptr)
1649 800 : dfDstDensity = poWK->pafDstDensity[iDstOffset];
1650 0 : else if (poWK->panDstValid != nullptr &&
1651 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1652 0 : dfDstDensity = 0.0;
1653 :
1654 800 : double dfDstReal = 0.0;
1655 800 : double dfDstImag = 0.0;
1656 : // It seems like we also ought to be testing panDstValid[] here!
1657 :
1658 : // TODO(schwehr): Factor out this repreated type of set.
1659 800 : switch (poWK->eWorkingDataType)
1660 : {
1661 0 : case GDT_Byte:
1662 0 : dfDstReal = pabyDst[iDstOffset];
1663 0 : dfDstImag = 0.0;
1664 0 : break;
1665 :
1666 0 : case GDT_Int8:
1667 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1668 0 : dfDstImag = 0.0;
1669 0 : break;
1670 :
1671 400 : case GDT_Int16:
1672 400 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1673 400 : dfDstImag = 0.0;
1674 400 : break;
1675 :
1676 400 : case GDT_UInt16:
1677 400 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1678 400 : dfDstImag = 0.0;
1679 400 : break;
1680 :
1681 0 : case GDT_Int32:
1682 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1683 0 : dfDstImag = 0.0;
1684 0 : break;
1685 :
1686 0 : case GDT_UInt32:
1687 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1688 0 : dfDstImag = 0.0;
1689 0 : break;
1690 :
1691 0 : case GDT_Int64:
1692 0 : dfDstReal = static_cast<double>(
1693 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1694 0 : dfDstImag = 0.0;
1695 0 : break;
1696 :
1697 0 : case GDT_UInt64:
1698 0 : dfDstReal = static_cast<double>(
1699 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1700 0 : dfDstImag = 0.0;
1701 0 : break;
1702 :
1703 0 : case GDT_Float16:
1704 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1705 0 : dfDstImag = 0.0;
1706 0 : break;
1707 :
1708 0 : case GDT_Float32:
1709 0 : dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
1710 0 : dfDstImag = 0.0;
1711 0 : break;
1712 :
1713 0 : case GDT_Float64:
1714 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1715 0 : dfDstImag = 0.0;
1716 0 : break;
1717 :
1718 0 : case GDT_CInt16:
1719 0 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
1720 0 : dfDstImag =
1721 0 : reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
1722 0 : break;
1723 :
1724 0 : case GDT_CInt32:
1725 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
1726 0 : dfDstImag =
1727 0 : reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
1728 0 : break;
1729 :
1730 0 : case GDT_CFloat16:
1731 : dfDstReal =
1732 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
1733 : dfDstImag =
1734 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
1735 0 : break;
1736 :
1737 0 : case GDT_CFloat32:
1738 0 : dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset * 2];
1739 0 : dfDstImag =
1740 0 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1];
1741 0 : break;
1742 :
1743 0 : case GDT_CFloat64:
1744 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
1745 0 : dfDstImag =
1746 0 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
1747 0 : break;
1748 :
1749 0 : case GDT_Unknown:
1750 : case GDT_TypeCount:
1751 0 : CPLAssert(false);
1752 : return false;
1753 : }
1754 :
1755 : // The destination density is really only relative to the portion
1756 : // not occluded by the overlay.
1757 800 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1758 :
1759 800 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
1760 800 : (dfDensity + dfDstInfluence);
1761 :
1762 800 : dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
1763 800 : (dfDensity + dfDstInfluence);
1764 : }
1765 :
1766 : /* -------------------------------------------------------------------- */
1767 : /* Actually apply the destination value. */
1768 : /* */
1769 : /* Avoid using the destination nodata value for integer datatypes */
1770 : /* if by chance it is equal to the computed pixel value. */
1771 : /* -------------------------------------------------------------------- */
1772 :
1773 3867240 : switch (poWK->eWorkingDataType)
1774 : {
1775 3141450 : case GDT_Byte:
1776 3141450 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
1777 3141450 : break;
1778 :
1779 0 : case GDT_Int8:
1780 0 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
1781 0 : break;
1782 :
1783 7470 : case GDT_Int16:
1784 7470 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
1785 7470 : break;
1786 :
1787 463 : case GDT_UInt16:
1788 463 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
1789 463 : break;
1790 :
1791 63 : case GDT_UInt32:
1792 63 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
1793 63 : break;
1794 :
1795 3470 : case GDT_Int32:
1796 3470 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
1797 3470 : break;
1798 :
1799 0 : case GDT_UInt64:
1800 0 : ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
1801 : dfReal);
1802 0 : break;
1803 :
1804 0 : case GDT_Int64:
1805 0 : ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
1806 : dfReal);
1807 0 : break;
1808 :
1809 0 : case GDT_Float16:
1810 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
1811 0 : break;
1812 :
1813 478957 : case GDT_Float32:
1814 478957 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
1815 478957 : break;
1816 :
1817 147 : case GDT_Float64:
1818 147 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
1819 147 : break;
1820 :
1821 234078 : case GDT_CInt16:
1822 : {
1823 : typedef GInt16 T;
1824 234078 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
1825 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1826 0 : cpl::NumericLimits<T>::min();
1827 234078 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1828 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1829 0 : cpl::NumericLimits<T>::max();
1830 : else
1831 234078 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1832 234078 : static_cast<T>(floor(dfReal + 0.5));
1833 234078 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
1834 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1835 0 : cpl::NumericLimits<T>::min();
1836 234078 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
1837 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1838 0 : cpl::NumericLimits<T>::max();
1839 : else
1840 234078 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1841 234078 : static_cast<T>(floor(dfImag + 0.5));
1842 234078 : break;
1843 : }
1844 :
1845 378 : case GDT_CInt32:
1846 : {
1847 : typedef GInt32 T;
1848 378 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
1849 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1850 0 : cpl::NumericLimits<T>::min();
1851 378 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1852 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1853 0 : cpl::NumericLimits<T>::max();
1854 : else
1855 378 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1856 378 : static_cast<T>(floor(dfReal + 0.5));
1857 378 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
1858 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1859 0 : cpl::NumericLimits<T>::min();
1860 378 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
1861 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1862 0 : cpl::NumericLimits<T>::max();
1863 : else
1864 378 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1865 378 : static_cast<T>(floor(dfImag + 0.5));
1866 378 : break;
1867 : }
1868 :
1869 0 : case GDT_CFloat16:
1870 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
1871 0 : static_cast<GFloat16>(dfReal);
1872 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
1873 0 : static_cast<GFloat16>(dfImag);
1874 0 : break;
1875 :
1876 390 : case GDT_CFloat32:
1877 390 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
1878 390 : static_cast<float>(dfReal);
1879 390 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
1880 390 : static_cast<float>(dfImag);
1881 390 : break;
1882 :
1883 378 : case GDT_CFloat64:
1884 378 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
1885 378 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
1886 378 : break;
1887 :
1888 0 : case GDT_Unknown:
1889 : case GDT_TypeCount:
1890 0 : return false;
1891 : }
1892 :
1893 3867240 : return true;
1894 : }
1895 :
1896 : /************************************************************************/
1897 : /* GWKSetPixelValueReal() */
1898 : /************************************************************************/
1899 :
1900 1073960 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
1901 : GPtrDiff_t iDstOffset, double dfDensity,
1902 : double dfReal)
1903 :
1904 : {
1905 1073960 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1906 :
1907 : /* -------------------------------------------------------------------- */
1908 : /* If the source density is less than 100% we need to fetch the */
1909 : /* existing destination value, and mix it with the source to */
1910 : /* get the new "to apply" value. Also compute composite */
1911 : /* density. */
1912 : /* */
1913 : /* We avoid mixing if density is very near one or risk mixing */
1914 : /* in very extreme nodata values and causing odd results (#1610) */
1915 : /* -------------------------------------------------------------------- */
1916 1073960 : if (dfDensity < 0.9999)
1917 : {
1918 78172 : if (dfDensity < 0.0001)
1919 0 : return true;
1920 :
1921 78172 : double dfDstReal = 0.0;
1922 78172 : double dfDstDensity = 1.0;
1923 :
1924 78172 : if (poWK->pafDstDensity != nullptr)
1925 78172 : dfDstDensity = poWK->pafDstDensity[iDstOffset];
1926 0 : else if (poWK->panDstValid != nullptr &&
1927 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1928 0 : dfDstDensity = 0.0;
1929 :
1930 : // It seems like we also ought to be testing panDstValid[] here!
1931 :
1932 78172 : switch (poWK->eWorkingDataType)
1933 : {
1934 0 : case GDT_Byte:
1935 0 : dfDstReal = pabyDst[iDstOffset];
1936 0 : break;
1937 :
1938 0 : case GDT_Int8:
1939 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1940 0 : break;
1941 :
1942 300 : case GDT_Int16:
1943 300 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1944 300 : break;
1945 :
1946 77872 : case GDT_UInt16:
1947 77872 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1948 77872 : break;
1949 :
1950 0 : case GDT_Int32:
1951 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1952 0 : break;
1953 :
1954 0 : case GDT_UInt32:
1955 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1956 0 : break;
1957 :
1958 0 : case GDT_Int64:
1959 0 : dfDstReal = static_cast<double>(
1960 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1961 0 : break;
1962 :
1963 0 : case GDT_UInt64:
1964 0 : dfDstReal = static_cast<double>(
1965 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1966 0 : break;
1967 :
1968 0 : case GDT_Float16:
1969 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1970 0 : break;
1971 :
1972 0 : case GDT_Float32:
1973 0 : dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
1974 0 : break;
1975 :
1976 0 : case GDT_Float64:
1977 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1978 0 : break;
1979 :
1980 0 : case GDT_CInt16:
1981 : case GDT_CInt32:
1982 : case GDT_CFloat16:
1983 : case GDT_CFloat32:
1984 : case GDT_CFloat64:
1985 : case GDT_Unknown:
1986 : case GDT_TypeCount:
1987 0 : CPLAssert(false);
1988 : return false;
1989 : }
1990 :
1991 : // The destination density is really only relative to the portion
1992 : // not occluded by the overlay.
1993 78172 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1994 :
1995 78172 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
1996 78172 : (dfDensity + dfDstInfluence);
1997 : }
1998 :
1999 : /* -------------------------------------------------------------------- */
2000 : /* Actually apply the destination value. */
2001 : /* */
2002 : /* Avoid using the destination nodata value for integer datatypes */
2003 : /* if by chance it is equal to the computed pixel value. */
2004 : /* -------------------------------------------------------------------- */
2005 :
2006 1073960 : switch (poWK->eWorkingDataType)
2007 : {
2008 916752 : case GDT_Byte:
2009 916752 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
2010 916752 : break;
2011 :
2012 0 : case GDT_Int8:
2013 0 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
2014 0 : break;
2015 :
2016 1117 : case GDT_Int16:
2017 1117 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
2018 1117 : break;
2019 :
2020 150735 : case GDT_UInt16:
2021 150735 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
2022 150735 : break;
2023 :
2024 347 : case GDT_UInt32:
2025 347 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
2026 347 : break;
2027 :
2028 1150 : case GDT_Int32:
2029 1150 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
2030 1150 : break;
2031 :
2032 32 : case GDT_UInt64:
2033 32 : ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
2034 : dfReal);
2035 32 : break;
2036 :
2037 32 : case GDT_Int64:
2038 32 : ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
2039 : dfReal);
2040 32 : break;
2041 :
2042 0 : case GDT_Float16:
2043 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
2044 0 : break;
2045 :
2046 3442 : case GDT_Float32:
2047 3442 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
2048 3442 : break;
2049 :
2050 350 : case GDT_Float64:
2051 350 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
2052 350 : break;
2053 :
2054 0 : case GDT_CInt16:
2055 : case GDT_CInt32:
2056 : case GDT_CFloat16:
2057 : case GDT_CFloat32:
2058 : case GDT_CFloat64:
2059 0 : return false;
2060 :
2061 0 : case GDT_Unknown:
2062 : case GDT_TypeCount:
2063 0 : CPLAssert(false);
2064 : return false;
2065 : }
2066 :
2067 1073960 : return true;
2068 : }
2069 :
2070 : /************************************************************************/
2071 : /* GWKGetPixelValue() */
2072 : /************************************************************************/
2073 :
2074 : /* It is assumed that panUnifiedSrcValid has been checked before */
2075 :
2076 29336100 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
2077 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2078 : double *pdfReal, double *pdfImag)
2079 :
2080 : {
2081 29336100 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2082 :
2083 58672300 : if (poWK->papanBandSrcValid != nullptr &&
2084 29336100 : poWK->papanBandSrcValid[iBand] != nullptr &&
2085 0 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2086 : {
2087 0 : *pdfDensity = 0.0;
2088 0 : return false;
2089 : }
2090 :
2091 29336100 : *pdfReal = 0.0;
2092 29336100 : *pdfImag = 0.0;
2093 :
2094 : // TODO(schwehr): Fix casting.
2095 29336100 : switch (poWK->eWorkingDataType)
2096 : {
2097 28245600 : case GDT_Byte:
2098 28245600 : *pdfReal = pabySrc[iSrcOffset];
2099 28245600 : *pdfImag = 0.0;
2100 28245600 : break;
2101 :
2102 0 : case GDT_Int8:
2103 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2104 0 : *pdfImag = 0.0;
2105 0 : break;
2106 :
2107 28226 : case GDT_Int16:
2108 28226 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2109 28226 : *pdfImag = 0.0;
2110 28226 : break;
2111 :
2112 163 : case GDT_UInt16:
2113 163 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2114 163 : *pdfImag = 0.0;
2115 163 : break;
2116 :
2117 13726 : case GDT_Int32:
2118 13726 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2119 13726 : *pdfImag = 0.0;
2120 13726 : break;
2121 :
2122 63 : case GDT_UInt32:
2123 63 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2124 63 : *pdfImag = 0.0;
2125 63 : break;
2126 :
2127 0 : case GDT_Int64:
2128 0 : *pdfReal = static_cast<double>(
2129 0 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2130 0 : *pdfImag = 0.0;
2131 0 : break;
2132 :
2133 0 : case GDT_UInt64:
2134 0 : *pdfReal = static_cast<double>(
2135 0 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2136 0 : *pdfImag = 0.0;
2137 0 : break;
2138 :
2139 0 : case GDT_Float16:
2140 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2141 0 : *pdfImag = 0.0;
2142 0 : break;
2143 :
2144 1047220 : case GDT_Float32:
2145 1047220 : *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
2146 1047220 : *pdfImag = 0.0;
2147 1047220 : break;
2148 :
2149 582 : case GDT_Float64:
2150 582 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2151 582 : *pdfImag = 0.0;
2152 582 : break;
2153 :
2154 130 : case GDT_CInt16:
2155 130 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
2156 130 : *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
2157 130 : break;
2158 :
2159 130 : case GDT_CInt32:
2160 130 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
2161 130 : *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
2162 130 : break;
2163 :
2164 0 : case GDT_CFloat16:
2165 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
2166 0 : *pdfImag =
2167 0 : reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
2168 0 : break;
2169 :
2170 178 : case GDT_CFloat32:
2171 178 : *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2];
2172 178 : *pdfImag = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1];
2173 178 : break;
2174 :
2175 130 : case GDT_CFloat64:
2176 130 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
2177 130 : *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
2178 130 : break;
2179 :
2180 0 : case GDT_Unknown:
2181 : case GDT_TypeCount:
2182 0 : CPLAssert(false);
2183 : *pdfDensity = 0.0;
2184 : return false;
2185 : }
2186 :
2187 29336100 : if (poWK->pafUnifiedSrcDensity != nullptr)
2188 3015160 : *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2189 : else
2190 26321000 : *pdfDensity = 1.0;
2191 :
2192 29336100 : return *pdfDensity != 0.0;
2193 : }
2194 :
2195 : /************************************************************************/
2196 : /* GWKGetPixelValueReal() */
2197 : /************************************************************************/
2198 :
2199 151448 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2200 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2201 : double *pdfReal)
2202 :
2203 : {
2204 151448 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2205 :
2206 302898 : if (poWK->papanBandSrcValid != nullptr &&
2207 151450 : poWK->papanBandSrcValid[iBand] != nullptr &&
2208 2 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2209 : {
2210 0 : *pdfDensity = 0.0;
2211 0 : return false;
2212 : }
2213 :
2214 151448 : switch (poWK->eWorkingDataType)
2215 : {
2216 1 : case GDT_Byte:
2217 1 : *pdfReal = pabySrc[iSrcOffset];
2218 1 : break;
2219 :
2220 0 : case GDT_Int8:
2221 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2222 0 : break;
2223 :
2224 1 : case GDT_Int16:
2225 1 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2226 1 : break;
2227 :
2228 150357 : case GDT_UInt16:
2229 150357 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2230 150357 : break;
2231 :
2232 886 : case GDT_Int32:
2233 886 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2234 886 : break;
2235 :
2236 83 : case GDT_UInt32:
2237 83 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2238 83 : break;
2239 :
2240 16 : case GDT_Int64:
2241 16 : *pdfReal = static_cast<double>(
2242 16 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2243 16 : break;
2244 :
2245 16 : case GDT_UInt64:
2246 16 : *pdfReal = static_cast<double>(
2247 16 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2248 16 : break;
2249 :
2250 0 : case GDT_Float16:
2251 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2252 0 : break;
2253 :
2254 2 : case GDT_Float32:
2255 2 : *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
2256 2 : break;
2257 :
2258 86 : case GDT_Float64:
2259 86 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2260 86 : break;
2261 :
2262 0 : case GDT_CInt16:
2263 : case GDT_CInt32:
2264 : case GDT_CFloat16:
2265 : case GDT_CFloat32:
2266 : case GDT_CFloat64:
2267 : case GDT_Unknown:
2268 : case GDT_TypeCount:
2269 0 : CPLAssert(false);
2270 : return false;
2271 : }
2272 :
2273 151448 : if (poWK->pafUnifiedSrcDensity != nullptr)
2274 150340 : *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2275 : else
2276 1108 : *pdfDensity = 1.0;
2277 :
2278 151448 : return *pdfDensity != 0.0;
2279 : }
2280 :
2281 : /************************************************************************/
2282 : /* GWKGetPixelRow() */
2283 : /************************************************************************/
2284 :
2285 : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
2286 : /* data-types. */
2287 :
2288 2352610 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
2289 : GPtrDiff_t iSrcOffset, int nHalfSrcLen,
2290 : double *padfDensity, double adfReal[],
2291 : double *padfImag)
2292 : {
2293 : // We know that nSrcLen is even, so we can *always* unroll loops 2x.
2294 2352610 : const int nSrcLen = nHalfSrcLen * 2;
2295 2352610 : bool bHasValid = false;
2296 :
2297 2352610 : if (padfDensity != nullptr)
2298 : {
2299 : // Init the density.
2300 3343290 : for (int i = 0; i < nSrcLen; i += 2)
2301 : {
2302 2188270 : padfDensity[i] = 1.0;
2303 2188270 : padfDensity[i + 1] = 1.0;
2304 : }
2305 :
2306 1155020 : if (poWK->panUnifiedSrcValid != nullptr)
2307 : {
2308 3281460 : for (int i = 0; i < nSrcLen; i += 2)
2309 : {
2310 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
2311 2067740 : bHasValid = true;
2312 : else
2313 74323 : padfDensity[i] = 0.0;
2314 :
2315 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
2316 2068400 : bHasValid = true;
2317 : else
2318 73668 : padfDensity[i + 1] = 0.0;
2319 : }
2320 :
2321 : // Reset or fail as needed.
2322 1139400 : if (bHasValid)
2323 1116590 : bHasValid = false;
2324 : else
2325 22806 : return false;
2326 : }
2327 :
2328 1132210 : if (poWK->papanBandSrcValid != nullptr &&
2329 0 : poWK->papanBandSrcValid[iBand] != nullptr)
2330 : {
2331 0 : for (int i = 0; i < nSrcLen; i += 2)
2332 : {
2333 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
2334 0 : bHasValid = true;
2335 : else
2336 0 : padfDensity[i] = 0.0;
2337 :
2338 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
2339 0 : iSrcOffset + i + 1))
2340 0 : bHasValid = true;
2341 : else
2342 0 : padfDensity[i + 1] = 0.0;
2343 : }
2344 :
2345 : // Reset or fail as needed.
2346 0 : if (bHasValid)
2347 0 : bHasValid = false;
2348 : else
2349 0 : return false;
2350 : }
2351 : }
2352 :
2353 : // TODO(schwehr): Fix casting.
2354 : // Fetch data.
2355 2329800 : switch (poWK->eWorkingDataType)
2356 : {
2357 1121080 : case GDT_Byte:
2358 : {
2359 1121080 : GByte *pSrc =
2360 1121080 : reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
2361 1121080 : pSrc += iSrcOffset;
2362 3243850 : for (int i = 0; i < nSrcLen; i += 2)
2363 : {
2364 2122770 : adfReal[i] = pSrc[i];
2365 2122770 : adfReal[i + 1] = pSrc[i + 1];
2366 : }
2367 1121080 : break;
2368 : }
2369 :
2370 0 : case GDT_Int8:
2371 : {
2372 0 : GInt8 *pSrc =
2373 0 : reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
2374 0 : pSrc += iSrcOffset;
2375 0 : for (int i = 0; i < nSrcLen; i += 2)
2376 : {
2377 0 : adfReal[i] = pSrc[i];
2378 0 : adfReal[i + 1] = pSrc[i + 1];
2379 : }
2380 0 : break;
2381 : }
2382 :
2383 5614 : case GDT_Int16:
2384 : {
2385 5614 : GInt16 *pSrc =
2386 5614 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2387 5614 : pSrc += iSrcOffset;
2388 21492 : for (int i = 0; i < nSrcLen; i += 2)
2389 : {
2390 15878 : adfReal[i] = pSrc[i];
2391 15878 : adfReal[i + 1] = pSrc[i + 1];
2392 : }
2393 5614 : break;
2394 : }
2395 :
2396 4142 : case GDT_UInt16:
2397 : {
2398 4142 : GUInt16 *pSrc =
2399 4142 : reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
2400 4142 : pSrc += iSrcOffset;
2401 18548 : for (int i = 0; i < nSrcLen; i += 2)
2402 : {
2403 14406 : adfReal[i] = pSrc[i];
2404 14406 : adfReal[i + 1] = pSrc[i + 1];
2405 : }
2406 4142 : break;
2407 : }
2408 :
2409 778 : case GDT_Int32:
2410 : {
2411 778 : GInt32 *pSrc =
2412 778 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2413 778 : pSrc += iSrcOffset;
2414 2288 : for (int i = 0; i < nSrcLen; i += 2)
2415 : {
2416 1510 : adfReal[i] = pSrc[i];
2417 1510 : adfReal[i + 1] = pSrc[i + 1];
2418 : }
2419 778 : break;
2420 : }
2421 :
2422 778 : case GDT_UInt32:
2423 : {
2424 778 : GUInt32 *pSrc =
2425 778 : reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
2426 778 : pSrc += iSrcOffset;
2427 2288 : for (int i = 0; i < nSrcLen; i += 2)
2428 : {
2429 1510 : adfReal[i] = pSrc[i];
2430 1510 : adfReal[i + 1] = pSrc[i + 1];
2431 : }
2432 778 : break;
2433 : }
2434 :
2435 28 : case GDT_Int64:
2436 : {
2437 28 : auto pSrc =
2438 28 : reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
2439 28 : pSrc += iSrcOffset;
2440 56 : for (int i = 0; i < nSrcLen; i += 2)
2441 : {
2442 28 : adfReal[i] = static_cast<double>(pSrc[i]);
2443 28 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2444 : }
2445 28 : break;
2446 : }
2447 :
2448 28 : case GDT_UInt64:
2449 : {
2450 28 : auto pSrc =
2451 28 : reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
2452 28 : pSrc += iSrcOffset;
2453 56 : for (int i = 0; i < nSrcLen; i += 2)
2454 : {
2455 28 : adfReal[i] = static_cast<double>(pSrc[i]);
2456 28 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2457 : }
2458 28 : break;
2459 : }
2460 :
2461 0 : case GDT_Float16:
2462 : {
2463 0 : GFloat16 *pSrc =
2464 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2465 0 : pSrc += iSrcOffset;
2466 0 : for (int i = 0; i < nSrcLen; i += 2)
2467 : {
2468 0 : adfReal[i] = pSrc[i];
2469 0 : adfReal[i + 1] = pSrc[i + 1];
2470 : }
2471 0 : break;
2472 : }
2473 :
2474 25102 : case GDT_Float32:
2475 : {
2476 25102 : float *pSrc =
2477 25102 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2478 25102 : pSrc += iSrcOffset;
2479 121403 : for (int i = 0; i < nSrcLen; i += 2)
2480 : {
2481 96301 : adfReal[i] = pSrc[i];
2482 96301 : adfReal[i + 1] = pSrc[i + 1];
2483 : }
2484 25102 : break;
2485 : }
2486 :
2487 778 : case GDT_Float64:
2488 : {
2489 778 : double *pSrc =
2490 778 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2491 778 : pSrc += iSrcOffset;
2492 2288 : for (int i = 0; i < nSrcLen; i += 2)
2493 : {
2494 1510 : adfReal[i] = pSrc[i];
2495 1510 : adfReal[i + 1] = pSrc[i + 1];
2496 : }
2497 778 : break;
2498 : }
2499 :
2500 1169220 : case GDT_CInt16:
2501 : {
2502 1169220 : GInt16 *pSrc =
2503 1169220 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2504 1169220 : pSrc += 2 * iSrcOffset;
2505 4676020 : for (int i = 0; i < nSrcLen; i += 2)
2506 : {
2507 3506800 : adfReal[i] = pSrc[2 * i];
2508 3506800 : padfImag[i] = pSrc[2 * i + 1];
2509 :
2510 3506800 : adfReal[i + 1] = pSrc[2 * i + 2];
2511 3506800 : padfImag[i + 1] = pSrc[2 * i + 3];
2512 : }
2513 1169220 : break;
2514 : }
2515 :
2516 750 : case GDT_CInt32:
2517 : {
2518 750 : GInt32 *pSrc =
2519 750 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2520 750 : pSrc += 2 * iSrcOffset;
2521 2232 : for (int i = 0; i < nSrcLen; i += 2)
2522 : {
2523 1482 : adfReal[i] = pSrc[2 * i];
2524 1482 : padfImag[i] = pSrc[2 * i + 1];
2525 :
2526 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2527 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2528 : }
2529 750 : break;
2530 : }
2531 :
2532 0 : case GDT_CFloat16:
2533 : {
2534 0 : GFloat16 *pSrc =
2535 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2536 0 : pSrc += 2 * iSrcOffset;
2537 0 : for (int i = 0; i < nSrcLen; i += 2)
2538 : {
2539 0 : adfReal[i] = pSrc[2 * i];
2540 0 : padfImag[i] = pSrc[2 * i + 1];
2541 :
2542 0 : adfReal[i + 1] = pSrc[2 * i + 2];
2543 0 : padfImag[i + 1] = pSrc[2 * i + 3];
2544 : }
2545 0 : break;
2546 : }
2547 :
2548 750 : case GDT_CFloat32:
2549 : {
2550 750 : float *pSrc =
2551 750 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2552 750 : pSrc += 2 * iSrcOffset;
2553 2232 : for (int i = 0; i < nSrcLen; i += 2)
2554 : {
2555 1482 : adfReal[i] = pSrc[2 * i];
2556 1482 : padfImag[i] = pSrc[2 * i + 1];
2557 :
2558 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2559 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2560 : }
2561 750 : break;
2562 : }
2563 :
2564 750 : case GDT_CFloat64:
2565 : {
2566 750 : double *pSrc =
2567 750 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2568 750 : pSrc += 2 * iSrcOffset;
2569 2232 : for (int i = 0; i < nSrcLen; i += 2)
2570 : {
2571 1482 : adfReal[i] = pSrc[2 * i];
2572 1482 : padfImag[i] = pSrc[2 * i + 1];
2573 :
2574 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2575 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2576 : }
2577 750 : break;
2578 : }
2579 :
2580 0 : case GDT_Unknown:
2581 : case GDT_TypeCount:
2582 0 : CPLAssert(false);
2583 : if (padfDensity)
2584 : memset(padfDensity, 0, nSrcLen * sizeof(double));
2585 : return false;
2586 : }
2587 :
2588 2329800 : if (padfDensity == nullptr)
2589 1197590 : return true;
2590 :
2591 1132210 : if (poWK->pafUnifiedSrcDensity == nullptr)
2592 : {
2593 3231720 : for (int i = 0; i < nSrcLen; i += 2)
2594 : {
2595 : // Take into account earlier calcs.
2596 2111610 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2597 : {
2598 2071710 : padfDensity[i] = 1.0;
2599 2071710 : bHasValid = true;
2600 : }
2601 :
2602 2111610 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2603 : {
2604 2072360 : padfDensity[i + 1] = 1.0;
2605 2072360 : bHasValid = true;
2606 : }
2607 : }
2608 : }
2609 : else
2610 : {
2611 54348 : for (int i = 0; i < nSrcLen; i += 2)
2612 : {
2613 42243 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2614 42243 : padfDensity[i] = poWK->pafUnifiedSrcDensity[iSrcOffset + i];
2615 42243 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2616 41704 : bHasValid = true;
2617 :
2618 42243 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2619 42243 : padfDensity[i + 1] =
2620 42243 : poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1];
2621 42243 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2622 41594 : bHasValid = true;
2623 : }
2624 : }
2625 :
2626 1132210 : return bHasValid;
2627 : }
2628 :
2629 : /************************************************************************/
2630 : /* GWKGetPixelT() */
2631 : /************************************************************************/
2632 :
2633 : template <class T>
2634 8159117 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
2635 : GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
2636 :
2637 : {
2638 8159117 : T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2639 :
2640 18455895 : if ((poWK->panUnifiedSrcValid != nullptr &&
2641 16318274 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
2642 8159117 : (poWK->papanBandSrcValid != nullptr &&
2643 21 : poWK->papanBandSrcValid[iBand] != nullptr &&
2644 21 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
2645 : {
2646 9 : *pdfDensity = 0.0;
2647 9 : return false;
2648 : }
2649 :
2650 8159107 : *pValue = pSrc[iSrcOffset];
2651 :
2652 8159107 : if (poWK->pafUnifiedSrcDensity == nullptr)
2653 7974694 : *pdfDensity = 1.0;
2654 : else
2655 184414 : *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2656 :
2657 8159107 : return *pdfDensity != 0.0;
2658 : }
2659 :
2660 : /************************************************************************/
2661 : /* GWKBilinearResample() */
2662 : /* Set of bilinear interpolators */
2663 : /************************************************************************/
2664 :
2665 72024 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
2666 : double dfSrcX, double dfSrcY,
2667 : double *pdfDensity, double *pdfReal,
2668 : double *pdfImag)
2669 :
2670 : {
2671 : // Save as local variables to avoid following pointers.
2672 72024 : const int nSrcXSize = poWK->nSrcXSize;
2673 72024 : const int nSrcYSize = poWK->nSrcYSize;
2674 :
2675 72024 : int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2676 72024 : int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2677 72024 : double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2678 72024 : double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2679 72024 : bool bShifted = false;
2680 :
2681 72024 : if (iSrcX == -1)
2682 : {
2683 292 : iSrcX = 0;
2684 292 : dfRatioX = 1;
2685 : }
2686 72024 : if (iSrcY == -1)
2687 : {
2688 7686 : iSrcY = 0;
2689 7686 : dfRatioY = 1;
2690 : }
2691 72024 : GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
2692 :
2693 : // Shift so we don't overrun the array.
2694 72024 : if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
2695 71972 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
2696 71972 : iSrcOffset + nSrcXSize + 1)
2697 : {
2698 104 : bShifted = true;
2699 104 : --iSrcOffset;
2700 : }
2701 :
2702 72024 : double adfDensity[2] = {0.0, 0.0};
2703 72024 : double adfReal[2] = {0.0, 0.0};
2704 72024 : double adfImag[2] = {0.0, 0.0};
2705 72024 : double dfAccumulatorReal = 0.0;
2706 72024 : double dfAccumulatorImag = 0.0;
2707 72024 : double dfAccumulatorDensity = 0.0;
2708 72024 : double dfAccumulatorDivisor = 0.0;
2709 :
2710 72024 : const GPtrDiff_t nSrcPixels =
2711 72024 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
2712 : // Get pixel row.
2713 72024 : if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
2714 144048 : iSrcOffset < nSrcPixels &&
2715 72024 : GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
2716 : adfImag))
2717 : {
2718 66368 : double dfMult1 = dfRatioX * dfRatioY;
2719 66368 : double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
2720 :
2721 : // Shifting corrected.
2722 66368 : if (bShifted)
2723 : {
2724 104 : adfReal[0] = adfReal[1];
2725 104 : adfImag[0] = adfImag[1];
2726 104 : adfDensity[0] = adfDensity[1];
2727 : }
2728 :
2729 : // Upper Left Pixel.
2730 66368 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2731 66368 : adfDensity[0] > SRC_DENSITY_THRESHOLD)
2732 : {
2733 60938 : dfAccumulatorDivisor += dfMult1;
2734 :
2735 60938 : dfAccumulatorReal += adfReal[0] * dfMult1;
2736 60938 : dfAccumulatorImag += adfImag[0] * dfMult1;
2737 60938 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
2738 : }
2739 :
2740 : // Upper Right Pixel.
2741 66368 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2742 65827 : adfDensity[1] > SRC_DENSITY_THRESHOLD)
2743 : {
2744 60553 : dfAccumulatorDivisor += dfMult2;
2745 :
2746 60553 : dfAccumulatorReal += adfReal[1] * dfMult2;
2747 60553 : dfAccumulatorImag += adfImag[1] * dfMult2;
2748 60553 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
2749 : }
2750 : }
2751 :
2752 : // Get pixel row.
2753 72024 : if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
2754 212030 : iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
2755 67982 : GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
2756 : adfReal, adfImag))
2757 : {
2758 62423 : double dfMult1 = dfRatioX * (1.0 - dfRatioY);
2759 62423 : double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2760 :
2761 : // Shifting corrected
2762 62423 : if (bShifted)
2763 : {
2764 52 : adfReal[0] = adfReal[1];
2765 52 : adfImag[0] = adfImag[1];
2766 52 : adfDensity[0] = adfDensity[1];
2767 : }
2768 :
2769 : // Lower Left Pixel
2770 62423 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2771 62423 : adfDensity[0] > SRC_DENSITY_THRESHOLD)
2772 : {
2773 57144 : dfAccumulatorDivisor += dfMult1;
2774 :
2775 57144 : dfAccumulatorReal += adfReal[0] * dfMult1;
2776 57144 : dfAccumulatorImag += adfImag[0] * dfMult1;
2777 57144 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
2778 : }
2779 :
2780 : // Lower Right Pixel.
2781 62423 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2782 61934 : adfDensity[1] > SRC_DENSITY_THRESHOLD)
2783 : {
2784 56957 : dfAccumulatorDivisor += dfMult2;
2785 :
2786 56957 : dfAccumulatorReal += adfReal[1] * dfMult2;
2787 56957 : dfAccumulatorImag += adfImag[1] * dfMult2;
2788 56957 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
2789 : }
2790 : }
2791 :
2792 : /* -------------------------------------------------------------------- */
2793 : /* Return result. */
2794 : /* -------------------------------------------------------------------- */
2795 72024 : if (dfAccumulatorDivisor == 1.0)
2796 : {
2797 40967 : *pdfReal = dfAccumulatorReal;
2798 40967 : *pdfImag = dfAccumulatorImag;
2799 40967 : *pdfDensity = dfAccumulatorDensity;
2800 40967 : return false;
2801 : }
2802 31057 : else if (dfAccumulatorDivisor < 0.00001)
2803 : {
2804 0 : *pdfReal = 0.0;
2805 0 : *pdfImag = 0.0;
2806 0 : *pdfDensity = 0.0;
2807 0 : return false;
2808 : }
2809 : else
2810 : {
2811 31057 : *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
2812 31057 : *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
2813 31057 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
2814 31057 : return true;
2815 : }
2816 : }
2817 :
2818 : template <class T>
2819 3672314 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
2820 : int iBand, double dfSrcX,
2821 : double dfSrcY, T *pValue)
2822 :
2823 : {
2824 :
2825 3672314 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2826 3672314 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2827 3672314 : GPtrDiff_t iSrcOffset =
2828 3672314 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
2829 3672314 : const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2830 3672314 : const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2831 :
2832 3672314 : const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2833 :
2834 3672314 : if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2835 3570478 : iSrcY + 1 < poWK->nSrcYSize)
2836 : {
2837 3547620 : const double dfAccumulator =
2838 3547620 : (pSrc[iSrcOffset] * dfRatioX +
2839 3547620 : pSrc[iSrcOffset + 1] * (1.0 - dfRatioX)) *
2840 : dfRatioY +
2841 3547620 : (pSrc[iSrcOffset + poWK->nSrcXSize] * dfRatioX +
2842 3547620 : pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * (1.0 - dfRatioX)) *
2843 3547620 : (1.0 - dfRatioY);
2844 :
2845 3547620 : *pValue = GWKRoundValueT<T>(dfAccumulator);
2846 :
2847 3547620 : return true;
2848 : }
2849 :
2850 124701 : double dfAccumulatorDivisor = 0.0;
2851 124701 : double dfAccumulator = 0.0;
2852 :
2853 : // Upper Left Pixel.
2854 124701 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
2855 51993 : iSrcY < poWK->nSrcYSize)
2856 : {
2857 51993 : const double dfMult = dfRatioX * dfRatioY;
2858 :
2859 51993 : dfAccumulatorDivisor += dfMult;
2860 :
2861 51993 : dfAccumulator += pSrc[iSrcOffset] * dfMult;
2862 : }
2863 :
2864 : // Upper Right Pixel.
2865 124701 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2866 58836 : iSrcY < poWK->nSrcYSize)
2867 : {
2868 58836 : const double dfMult = (1.0 - dfRatioX) * dfRatioY;
2869 :
2870 58836 : dfAccumulatorDivisor += dfMult;
2871 :
2872 58836 : dfAccumulator += pSrc[iSrcOffset + 1] * dfMult;
2873 : }
2874 :
2875 : // Lower Right Pixel.
2876 124701 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
2877 94953 : iSrcY + 1 < poWK->nSrcYSize)
2878 : {
2879 71702 : const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2880 :
2881 71702 : dfAccumulatorDivisor += dfMult;
2882 :
2883 71702 : dfAccumulator += pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * dfMult;
2884 : }
2885 :
2886 : // Lower Left Pixel.
2887 124701 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
2888 88088 : iSrcY + 1 < poWK->nSrcYSize)
2889 : {
2890 64641 : const double dfMult = dfRatioX * (1.0 - dfRatioY);
2891 :
2892 64641 : dfAccumulatorDivisor += dfMult;
2893 :
2894 64641 : dfAccumulator += pSrc[iSrcOffset + poWK->nSrcXSize] * dfMult;
2895 : }
2896 :
2897 : /* -------------------------------------------------------------------- */
2898 : /* Return result. */
2899 : /* -------------------------------------------------------------------- */
2900 124701 : double dfValue = 0.0;
2901 :
2902 124701 : if (dfAccumulatorDivisor < 0.00001)
2903 : {
2904 0 : *pValue = 0;
2905 0 : return false;
2906 : }
2907 124701 : else if (dfAccumulatorDivisor == 1.0)
2908 : {
2909 7320 : dfValue = dfAccumulator;
2910 : }
2911 : else
2912 : {
2913 117381 : dfValue = dfAccumulator / dfAccumulatorDivisor;
2914 : }
2915 :
2916 124701 : *pValue = GWKRoundValueT<T>(dfValue);
2917 :
2918 124701 : return true;
2919 : }
2920 :
2921 : /************************************************************************/
2922 : /* GWKCubicResample() */
2923 : /* Set of bicubic interpolators using cubic convolution. */
2924 : /************************************************************************/
2925 :
2926 : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
2927 : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
2928 : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
2929 :
2930 : template <typename T>
2931 1602850 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
2932 : T f1, T f2, T f3)
2933 : {
2934 1602850 : return (f1 + T(0.5) * (distance1 * (f2 - f0) +
2935 1602850 : distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
2936 1602850 : distance3 * (3 * (f1 - f2) + f3 - f0)));
2937 : }
2938 :
2939 : /************************************************************************/
2940 : /* GWKCubicComputeWeights() */
2941 : /************************************************************************/
2942 :
2943 : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
2944 :
2945 : template <typename T>
2946 2267674 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
2947 : {
2948 2267674 : const T halfX = T(0.5) * x;
2949 2267674 : const T threeX = T(3.0) * x;
2950 2267674 : const T halfX2 = halfX * x;
2951 :
2952 2267674 : coeffs[0] = halfX * (-1 + x * (2 - x));
2953 2267674 : coeffs[1] = 1 + halfX2 * (-5 + threeX);
2954 2267674 : coeffs[2] = halfX * (1 + x * (4 - threeX));
2955 2267674 : coeffs[3] = halfX2 * (-1 + x);
2956 2267674 : }
2957 :
2958 : // TODO(schwehr): Use an inline function.
2959 : #define CONVOL4(v1, v2) \
2960 : ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2] + \
2961 : (v1)[3] * (v2)[3])
2962 :
2963 : #if 0
2964 : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
2965 : // instead of 17.
2966 : // TODO(schwehr): Use an inline function.
2967 : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \
2968 : { \
2969 : const double dfX = dfX_; \
2970 : dfHalfX = 0.5 * dfX; \
2971 : const double dfThreeX = 3.0 * dfX; \
2972 : const double dfXMinus1 = dfX - 1; \
2973 : \
2974 : adfCoeffs[0] = -1 + dfX * (2 - dfX); \
2975 : adfCoeffs[1] = dfX * (-5 + dfThreeX); \
2976 : /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \
2977 : adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \
2978 : /*adfCoeffs[3] = dfX * (-1 + dfX); */ \
2979 : adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \
2980 : }
2981 :
2982 : // TODO(schwehr): Use an inline function.
2983 : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \
2984 : ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
2985 : (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
2986 : #endif
2987 :
2988 299879 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
2989 : double dfSrcX, double dfSrcY,
2990 : double *pdfDensity, double *pdfReal,
2991 : double *pdfImag)
2992 :
2993 : {
2994 299879 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
2995 299879 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
2996 299879 : GPtrDiff_t iSrcOffset =
2997 299879 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
2998 299879 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
2999 299879 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3000 299879 : double adfDensity[4] = {};
3001 299879 : double adfReal[4] = {};
3002 299879 : double adfImag[4] = {};
3003 :
3004 : // Get the bilinear interpolation at the image borders.
3005 299879 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3006 284412 : iSrcY + 2 >= poWK->nSrcYSize)
3007 24136 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3008 24136 : pdfDensity, pdfReal, pdfImag);
3009 :
3010 275743 : double adfValueDens[4] = {};
3011 275743 : double adfValueReal[4] = {};
3012 275743 : double adfValueImag[4] = {};
3013 :
3014 275743 : double adfCoeffsX[4] = {};
3015 275743 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3016 :
3017 1232410 : for (GPtrDiff_t i = -1; i < 3; i++)
3018 : {
3019 1003120 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3020 991507 : 2, adfDensity, adfReal, adfImag) ||
3021 991507 : adfDensity[0] < SRC_DENSITY_THRESHOLD ||
3022 973867 : adfDensity[1] < SRC_DENSITY_THRESHOLD ||
3023 2960190 : adfDensity[2] < SRC_DENSITY_THRESHOLD ||
3024 965566 : adfDensity[3] < SRC_DENSITY_THRESHOLD)
3025 : {
3026 46449 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3027 46449 : pdfDensity, pdfReal, pdfImag);
3028 : }
3029 :
3030 956668 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3031 956668 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3032 956668 : adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
3033 : }
3034 :
3035 : /* -------------------------------------------------------------------- */
3036 : /* For now, if we have any pixels missing in the kernel area, */
3037 : /* we fallback on using bilinear interpolation. Ideally we */
3038 : /* should do "weight adjustment" of our results similarly to */
3039 : /* what is done for the cubic spline and lanc. interpolators. */
3040 : /* -------------------------------------------------------------------- */
3041 :
3042 229294 : double adfCoeffsY[4] = {};
3043 229294 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3044 :
3045 229294 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3046 229294 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3047 229294 : *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
3048 :
3049 229294 : return true;
3050 : }
3051 :
3052 : #ifdef USE_SSE2
3053 :
3054 : /************************************************************************/
3055 : /* XMMLoad4Values() */
3056 : /* */
3057 : /* Load 4 packed byte or uint16, cast them to float and put them in a */
3058 : /* m128 register. */
3059 : /************************************************************************/
3060 :
3061 949092 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
3062 : {
3063 : unsigned int i;
3064 949092 : memcpy(&i, ptr, 4);
3065 1898180 : __m128i xmm_i = _mm_cvtsi32_si128(i);
3066 : // Zero extend 4 packed unsigned 8-bit integers in a to packed
3067 : // 32-bit integers.
3068 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3069 : xmm_i = _mm_cvtepu8_epi32(xmm_i);
3070 : #else
3071 1898180 : xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
3072 1898180 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3073 : #endif
3074 1898180 : return _mm_cvtepi32_ps(xmm_i);
3075 : }
3076 :
3077 5292 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
3078 : {
3079 : GUInt64 i;
3080 5292 : memcpy(&i, ptr, 8);
3081 10584 : __m128i xmm_i = _mm_cvtsi64_si128(i);
3082 : // Zero extend 4 packed unsigned 16-bit integers in a to packed
3083 : // 32-bit integers.
3084 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3085 : xmm_i = _mm_cvtepu16_epi32(xmm_i);
3086 : #else
3087 10584 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3088 : #endif
3089 10584 : return _mm_cvtepi32_ps(xmm_i);
3090 : }
3091 :
3092 : /************************************************************************/
3093 : /* XMMHorizontalAdd() */
3094 : /* */
3095 : /* Return the sum of the 4 floating points of the register. */
3096 : /************************************************************************/
3097 :
3098 : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
3099 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3100 : {
3101 : __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1)
3102 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0)
3103 : shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2)
3104 : sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2)
3105 : return _mm_cvtss_f32(sums);
3106 : }
3107 : #else
3108 238596 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3109 : {
3110 238596 : __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2)
3111 238596 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0)
3112 238596 : shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1)
3113 238596 : sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1)
3114 238596 : return _mm_cvtss_f32(sums);
3115 : }
3116 : #endif
3117 :
3118 : #endif // define USE_SSE2
3119 :
3120 : /************************************************************************/
3121 : /* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */
3122 : /************************************************************************/
3123 :
3124 : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
3125 : // because there are a few assumptions above those types.
3126 : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
3127 : // perf benefit.
3128 :
3129 : template <class T>
3130 361 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
3131 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3132 : double *pdfDensity, double *pdfReal)
3133 : {
3134 361 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3135 361 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3136 361 : const GPtrDiff_t iSrcOffset =
3137 361 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3138 :
3139 : // Get the bilinear interpolation at the image borders.
3140 361 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3141 361 : iSrcY + 2 >= poWK->nSrcYSize)
3142 : {
3143 0 : double adfImagIgnored[4] = {};
3144 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3145 0 : pdfDensity, pdfReal, adfImagIgnored);
3146 : }
3147 :
3148 : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3149 : const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
3150 : const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
3151 :
3152 : // TODO(schwehr): Explain the magic numbers.
3153 : float afTemp[4 + 4 + 4 + 1];
3154 : float *pafAligned =
3155 : reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
3156 : float *pafCoeffs = pafAligned;
3157 : float *pafDensity = pafAligned + 4;
3158 : float *pafValue = pafAligned + 8;
3159 :
3160 : const float fHalfDeltaX = 0.5f * fDeltaX;
3161 : const float fThreeDeltaX = 3.0f * fDeltaX;
3162 : const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
3163 :
3164 : pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
3165 : pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
3166 : pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
3167 : pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
3168 : __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
3169 : const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD);
3170 :
3171 : __m128 xmmMaskLowDensity = _mm_setzero_ps();
3172 : for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
3173 : i++, iOffset += poWK->nSrcXSize)
3174 : {
3175 : const __m128 xmmDensity =
3176 : _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
3177 : xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
3178 : _mm_cmplt_ps(xmmDensity, xmmThreshold));
3179 : pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3180 :
3181 : const __m128 xmmValues =
3182 : XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
3183 : pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
3184 : }
3185 : if (_mm_movemask_ps(xmmMaskLowDensity))
3186 : {
3187 : double adfImagIgnored[4] = {};
3188 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3189 : pdfDensity, pdfReal, adfImagIgnored);
3190 : }
3191 :
3192 : const float fHalfDeltaY = 0.5f * fDeltaY;
3193 : const float fThreeDeltaY = 3.0f * fDeltaY;
3194 : const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
3195 :
3196 : pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
3197 : pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
3198 : pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
3199 : pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
3200 :
3201 : xmmCoeffs = _mm_load_ps(pafCoeffs);
3202 :
3203 : const __m128 xmmDensity = _mm_load_ps(pafDensity);
3204 : const __m128 xmmValue = _mm_load_ps(pafValue);
3205 : *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3206 : *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
3207 :
3208 : // We did all above computations on float32 whereas the general case is
3209 : // float64. Not sure if one is fundamentally more correct than the other
3210 : // one, but we want our optimization to give the same result as the
3211 : // general case as much as possible, so if the resulting value is
3212 : // close to some_int_value + 0.5, redo the computation with the general
3213 : // case.
3214 : // Note: If other types than Byte or UInt16, will need changes.
3215 : if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
3216 : return true;
3217 :
3218 : #endif // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3219 :
3220 361 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3221 361 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3222 :
3223 361 : double adfValueDens[4] = {};
3224 361 : double adfValueReal[4] = {};
3225 :
3226 361 : double adfCoeffsX[4] = {};
3227 361 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3228 :
3229 361 : double adfCoeffsY[4] = {};
3230 361 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3231 :
3232 1433 : for (GPtrDiff_t i = -1; i < 3; i++)
3233 : {
3234 1177 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3235 : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
3236 1177 : if (poWK->pafUnifiedSrcDensity[iOffset + 0] < SRC_DENSITY_THRESHOLD ||
3237 1089 : poWK->pafUnifiedSrcDensity[iOffset + 1] < SRC_DENSITY_THRESHOLD ||
3238 1089 : poWK->pafUnifiedSrcDensity[iOffset + 2] < SRC_DENSITY_THRESHOLD ||
3239 1089 : poWK->pafUnifiedSrcDensity[iOffset + 3] < SRC_DENSITY_THRESHOLD)
3240 : {
3241 105 : double adfImagIgnored[4] = {};
3242 105 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3243 : pdfDensity, pdfReal,
3244 105 : adfImagIgnored);
3245 : }
3246 : #endif
3247 :
3248 1072 : adfValueDens[i + 1] =
3249 1072 : CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
3250 :
3251 1072 : adfValueReal[i + 1] = CONVOL4(
3252 : adfCoeffsX,
3253 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3254 : }
3255 :
3256 256 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3257 256 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3258 :
3259 256 : return true;
3260 : }
3261 :
3262 : /************************************************************************/
3263 : /* GWKCubicResampleSrcMaskIsDensity4SampleReal() */
3264 : /* Bi-cubic when source has and only has pafUnifiedSrcDensity. */
3265 : /************************************************************************/
3266 :
3267 0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
3268 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3269 : double *pdfDensity, double *pdfReal)
3270 :
3271 : {
3272 0 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3273 0 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3274 0 : const GPtrDiff_t iSrcOffset =
3275 0 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3276 0 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3277 0 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3278 :
3279 : // Get the bilinear interpolation at the image borders.
3280 0 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3281 0 : iSrcY + 2 >= poWK->nSrcYSize)
3282 : {
3283 0 : double adfImagIgnored[4] = {};
3284 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3285 0 : pdfDensity, pdfReal, adfImagIgnored);
3286 : }
3287 :
3288 0 : double adfCoeffsX[4] = {};
3289 0 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3290 :
3291 0 : double adfCoeffsY[4] = {};
3292 0 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3293 :
3294 0 : double adfValueDens[4] = {};
3295 0 : double adfValueReal[4] = {};
3296 0 : double adfDensity[4] = {};
3297 0 : double adfReal[4] = {};
3298 0 : double adfImagIgnored[4] = {};
3299 :
3300 0 : for (GPtrDiff_t i = -1; i < 3; i++)
3301 : {
3302 0 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3303 0 : 2, adfDensity, adfReal, adfImagIgnored) ||
3304 0 : adfDensity[0] < SRC_DENSITY_THRESHOLD ||
3305 0 : adfDensity[1] < SRC_DENSITY_THRESHOLD ||
3306 0 : adfDensity[2] < SRC_DENSITY_THRESHOLD ||
3307 0 : adfDensity[3] < SRC_DENSITY_THRESHOLD)
3308 : {
3309 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3310 : pdfDensity, pdfReal,
3311 0 : adfImagIgnored);
3312 : }
3313 :
3314 0 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3315 0 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3316 : }
3317 :
3318 0 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3319 0 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3320 :
3321 0 : return true;
3322 : }
3323 :
3324 : template <class T>
3325 1906603 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3326 : int iBand, double dfSrcX,
3327 : double dfSrcY, T *pValue)
3328 :
3329 : {
3330 1906603 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3331 1906603 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3332 1906603 : const GPtrDiff_t iSrcOffset =
3333 1906603 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3334 1906603 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3335 1906603 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3336 1906603 : const double dfDeltaY2 = dfDeltaY * dfDeltaY;
3337 1906603 : const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
3338 :
3339 : // Get the bilinear interpolation at the image borders.
3340 1906603 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3341 1662527 : iSrcY + 2 >= poWK->nSrcYSize)
3342 303751 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3343 303751 : pValue);
3344 :
3345 1602852 : double adfCoeffs[4] = {};
3346 1602852 : GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
3347 :
3348 1602852 : double adfValue[4] = {};
3349 :
3350 8014250 : for (GPtrDiff_t i = -1; i < 3; i++)
3351 : {
3352 6411406 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3353 :
3354 6411406 : adfValue[i + 1] = CONVOL4(
3355 : adfCoeffs,
3356 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3357 : }
3358 :
3359 : const double dfValue =
3360 1602852 : CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
3361 : adfValue[1], adfValue[2], adfValue[3]);
3362 :
3363 1602852 : *pValue = GWKClampValueT<T>(dfValue);
3364 :
3365 1602852 : return true;
3366 : }
3367 :
3368 : /************************************************************************/
3369 : /* GWKLanczosSinc() */
3370 : /************************************************************************/
3371 :
3372 : /*
3373 : * Lanczos windowed sinc interpolation kernel with radius r.
3374 : * /
3375 : * | sinc(x) * sinc(x/r), if |x| < r
3376 : * L(x) = | 1, if x = 0 ,
3377 : * | 0, otherwise
3378 : * \
3379 : *
3380 : * where sinc(x) = sin(PI * x) / (PI * x).
3381 : */
3382 :
3383 1056 : static double GWKLanczosSinc(double dfX)
3384 : {
3385 1056 : if (dfX == 0.0)
3386 0 : return 1.0;
3387 :
3388 1056 : const double dfPIX = M_PI * dfX;
3389 1056 : const double dfPIXoverR = dfPIX / 3;
3390 1056 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3391 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3392 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3393 1056 : const double dfSinPIXoverR = sin(dfPIXoverR);
3394 1056 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3395 1056 : const double dfSinPIXMulSinPIXoverR =
3396 1056 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3397 1056 : return dfSinPIXMulSinPIXoverR / dfPIX2overR;
3398 : }
3399 :
3400 106242 : static double GWKLanczosSinc4Values(double *padfValues)
3401 : {
3402 531210 : for (int i = 0; i < 4; i++)
3403 : {
3404 424968 : if (padfValues[i] == 0.0)
3405 : {
3406 0 : padfValues[i] = 1.0;
3407 : }
3408 : else
3409 : {
3410 424968 : const double dfPIX = M_PI * padfValues[i];
3411 424968 : const double dfPIXoverR = dfPIX / 3;
3412 424968 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3413 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3414 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3415 424968 : const double dfSinPIXoverR = sin(dfPIXoverR);
3416 424968 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3417 424968 : const double dfSinPIXMulSinPIXoverR =
3418 424968 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3419 424968 : padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
3420 : }
3421 : }
3422 106242 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3423 : }
3424 :
3425 : /************************************************************************/
3426 : /* GWKBilinear() */
3427 : /************************************************************************/
3428 :
3429 6668440 : static double GWKBilinear(double dfX)
3430 : {
3431 6668440 : double dfAbsX = fabs(dfX);
3432 6668440 : if (dfAbsX <= 1.0)
3433 6197330 : return 1 - dfAbsX;
3434 : else
3435 471105 : return 0.0;
3436 : }
3437 :
3438 396096 : static double GWKBilinear4Values(double *padfValues)
3439 : {
3440 396096 : double dfAbsX0 = fabs(padfValues[0]);
3441 396096 : double dfAbsX1 = fabs(padfValues[1]);
3442 396096 : double dfAbsX2 = fabs(padfValues[2]);
3443 396096 : double dfAbsX3 = fabs(padfValues[3]);
3444 396096 : if (dfAbsX0 <= 1.0)
3445 290408 : padfValues[0] = 1 - dfAbsX0;
3446 : else
3447 105688 : padfValues[0] = 0.0;
3448 396096 : if (dfAbsX1 <= 1.0)
3449 396096 : padfValues[1] = 1 - dfAbsX1;
3450 : else
3451 0 : padfValues[1] = 0.0;
3452 396096 : if (dfAbsX2 <= 1.0)
3453 396096 : padfValues[2] = 1 - dfAbsX2;
3454 : else
3455 0 : padfValues[2] = 0.0;
3456 396096 : if (dfAbsX3 <= 1.0)
3457 290300 : padfValues[3] = 1 - dfAbsX3;
3458 : else
3459 105796 : padfValues[3] = 0.0;
3460 396096 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3461 : }
3462 :
3463 : /************************************************************************/
3464 : /* GWKCubic() */
3465 : /************************************************************************/
3466 :
3467 4350250 : static double GWKCubic(double dfX)
3468 : {
3469 4350250 : return CubicKernel(dfX);
3470 : }
3471 :
3472 7094020 : static double GWKCubic4Values(double *padfValues)
3473 : {
3474 7094020 : const double dfAbsX_0 = fabs(padfValues[0]);
3475 7094020 : const double dfAbsX_1 = fabs(padfValues[1]);
3476 7094020 : const double dfAbsX_2 = fabs(padfValues[2]);
3477 7094020 : const double dfAbsX_3 = fabs(padfValues[3]);
3478 7094020 : const double dfX2_0 = padfValues[0] * padfValues[0];
3479 7094020 : const double dfX2_1 = padfValues[1] * padfValues[1];
3480 7094020 : const double dfX2_2 = padfValues[2] * padfValues[2];
3481 7094020 : const double dfX2_3 = padfValues[3] * padfValues[3];
3482 :
3483 7094020 : double dfVal0 = 0.0;
3484 7094020 : if (dfAbsX_0 <= 1.0)
3485 1030570 : dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
3486 6063460 : else if (dfAbsX_0 <= 2.0)
3487 4312220 : dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
3488 :
3489 7094020 : double dfVal1 = 0.0;
3490 7094020 : if (dfAbsX_1 <= 1.0)
3491 4127940 : dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
3492 2966090 : else if (dfAbsX_1 <= 2.0)
3493 2971000 : dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
3494 :
3495 7094020 : double dfVal2 = 0.0;
3496 7094020 : if (dfAbsX_2 <= 1.0)
3497 5938430 : dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
3498 1155590 : else if (dfAbsX_2 <= 2.0)
3499 1151460 : dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
3500 :
3501 7094020 : double dfVal3 = 0.0;
3502 7094020 : if (dfAbsX_3 <= 1.0)
3503 3165600 : dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
3504 3928420 : else if (dfAbsX_3 <= 2.0)
3505 3665830 : dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
3506 :
3507 7094020 : padfValues[0] = dfVal0;
3508 7094020 : padfValues[1] = dfVal1;
3509 7094020 : padfValues[2] = dfVal2;
3510 7094020 : padfValues[3] = dfVal3;
3511 7094020 : return dfVal0 + dfVal1 + dfVal2 + dfVal3;
3512 : }
3513 :
3514 : /************************************************************************/
3515 : /* GWKBSpline() */
3516 : /************************************************************************/
3517 :
3518 : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
3519 : // Equation 8 with (B,C)=(1,0)
3520 : // 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1
3521 : // 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| >= 1 and |x| < 2
3522 :
3523 138696 : static double GWKBSpline(double x)
3524 : {
3525 138696 : const double xp2 = x + 2.0;
3526 138696 : const double xp1 = x + 1.0;
3527 138696 : const double xm1 = x - 1.0;
3528 :
3529 : // This will most likely be used, so we'll compute it ahead of time to
3530 : // avoid stalling the processor.
3531 138696 : const double xp2c = xp2 * xp2 * xp2;
3532 :
3533 : // Note that the test is computed only if it is needed.
3534 : // TODO(schwehr): Make this easier to follow.
3535 : return xp2 > 0.0
3536 277392 : ? ((xp1 > 0.0)
3537 138696 : ? ((x > 0.0)
3538 124338 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3539 89912 : 6.0 * x * x * x
3540 : : 0.0) +
3541 124338 : -4.0 * xp1 * xp1 * xp1
3542 : : 0.0) +
3543 : xp2c
3544 138696 : : 0.0; // * 0.166666666666666666666
3545 : }
3546 :
3547 2220360 : static double GWKBSpline4Values(double *padfValues)
3548 : {
3549 11101800 : for (int i = 0; i < 4; i++)
3550 : {
3551 8881440 : const double x = padfValues[i];
3552 8881440 : const double xp2 = x + 2.0;
3553 8881440 : const double xp1 = x + 1.0;
3554 8881440 : const double xm1 = x - 1.0;
3555 :
3556 : // This will most likely be used, so we'll compute it ahead of time to
3557 : // avoid stalling the processor.
3558 8881440 : const double xp2c = xp2 * xp2 * xp2;
3559 :
3560 : // Note that the test is computed only if it is needed.
3561 : // TODO(schwehr): Make this easier to follow.
3562 8881440 : padfValues[i] =
3563 : (xp2 > 0.0)
3564 17762900 : ? ((xp1 > 0.0)
3565 8881440 : ? ((x > 0.0)
3566 6660880 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3567 4437750 : 6.0 * x * x * x
3568 : : 0.0) +
3569 6660880 : -4.0 * xp1 * xp1 * xp1
3570 : : 0.0) +
3571 : xp2c
3572 : : 0.0; // * 0.166666666666666666666
3573 : }
3574 2220360 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3575 : }
3576 : /************************************************************************/
3577 : /* GWKResampleWrkStruct */
3578 : /************************************************************************/
3579 :
3580 : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
3581 :
3582 : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
3583 : double dfSrcX, double dfSrcY,
3584 : double *pdfDensity, double *pdfReal,
3585 : double *pdfImag,
3586 : GWKResampleWrkStruct *psWrkStruct);
3587 :
3588 : struct _GWKResampleWrkStruct
3589 : {
3590 : pfnGWKResampleType pfnGWKResample;
3591 :
3592 : // Space for saved X weights.
3593 : double *padfWeightsX;
3594 : bool *pabCalcX;
3595 :
3596 : double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos.
3597 : int iLastSrcX; // Only used by GWKResampleOptimizedLanczos.
3598 : int iLastSrcY; // Only used by GWKResampleOptimizedLanczos.
3599 : double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos.
3600 : double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos.
3601 : double dfCosPiXScale; // Only used by GWKResampleOptimizedLanczos.
3602 : double dfSinPiXScale; // Only used by GWKResampleOptimizedLanczos.
3603 : double dfCosPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3604 : double dfSinPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3605 : double dfCosPiYScale; // Only used by GWKResampleOptimizedLanczos.
3606 : double dfSinPiYScale; // Only used by GWKResampleOptimizedLanczos.
3607 : double dfCosPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3608 : double dfSinPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3609 :
3610 : // Space for saving a row of pixels.
3611 : double *padfRowDensity;
3612 : double *padfRowReal;
3613 : double *padfRowImag;
3614 : };
3615 :
3616 : /************************************************************************/
3617 : /* GWKResampleCreateWrkStruct() */
3618 : /************************************************************************/
3619 :
3620 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3621 : double dfSrcY, double *pdfDensity, double *pdfReal,
3622 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
3623 :
3624 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3625 : double dfSrcX, double dfSrcY,
3626 : double *pdfDensity, double *pdfReal,
3627 : double *pdfImag,
3628 : GWKResampleWrkStruct *psWrkStruct);
3629 :
3630 344 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
3631 : {
3632 344 : const int nXDist = (poWK->nXRadius + 1) * 2;
3633 344 : const int nYDist = (poWK->nYRadius + 1) * 2;
3634 :
3635 : GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
3636 344 : CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
3637 :
3638 : // Alloc space for saved X weights.
3639 344 : psWrkStruct->padfWeightsX =
3640 344 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3641 344 : psWrkStruct->pabCalcX =
3642 344 : static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
3643 :
3644 344 : psWrkStruct->padfWeightsY =
3645 344 : static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
3646 344 : psWrkStruct->iLastSrcX = -10;
3647 344 : psWrkStruct->iLastSrcY = -10;
3648 344 : psWrkStruct->dfLastDeltaX = -10;
3649 344 : psWrkStruct->dfLastDeltaY = -10;
3650 :
3651 : // Alloc space for saving a row of pixels.
3652 344 : if (poWK->pafUnifiedSrcDensity == nullptr &&
3653 316 : poWK->panUnifiedSrcValid == nullptr &&
3654 304 : poWK->papanBandSrcValid == nullptr)
3655 : {
3656 304 : psWrkStruct->padfRowDensity = nullptr;
3657 : }
3658 : else
3659 : {
3660 40 : psWrkStruct->padfRowDensity =
3661 40 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3662 : }
3663 344 : psWrkStruct->padfRowReal =
3664 344 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3665 344 : psWrkStruct->padfRowImag =
3666 344 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3667 :
3668 344 : if (poWK->eResample == GRA_Lanczos)
3669 : {
3670 63 : psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
3671 :
3672 63 : if (poWK->dfXScale < 1)
3673 : {
3674 4 : psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
3675 4 : psWrkStruct->dfSinPiXScaleOver3 =
3676 4 : sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
3677 4 : psWrkStruct->dfCosPiXScaleOver3);
3678 : // "Naive":
3679 : // const double dfCosPiXScale = cos( M_PI * dfXScale );
3680 : // const double dfSinPiXScale = sin( M_PI * dfXScale );
3681 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3682 4 : psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
3683 4 : psWrkStruct->dfCosPiXScaleOver3 -
3684 4 : 3) *
3685 4 : psWrkStruct->dfCosPiXScaleOver3;
3686 4 : psWrkStruct->dfSinPiXScale = sqrt(
3687 4 : 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
3688 : }
3689 :
3690 63 : if (poWK->dfYScale < 1)
3691 : {
3692 11 : psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
3693 11 : psWrkStruct->dfSinPiYScaleOver3 =
3694 11 : sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
3695 11 : psWrkStruct->dfCosPiYScaleOver3);
3696 : // "Naive":
3697 : // const double dfCosPiYScale = cos( M_PI * dfYScale );
3698 : // const double dfSinPiYScale = sin( M_PI * dfYScale );
3699 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3700 11 : psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
3701 11 : psWrkStruct->dfCosPiYScaleOver3 -
3702 11 : 3) *
3703 11 : psWrkStruct->dfCosPiYScaleOver3;
3704 11 : psWrkStruct->dfSinPiYScale = sqrt(
3705 11 : 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
3706 : }
3707 : }
3708 : else
3709 281 : psWrkStruct->pfnGWKResample = GWKResample;
3710 :
3711 344 : return psWrkStruct;
3712 : }
3713 :
3714 : /************************************************************************/
3715 : /* GWKResampleDeleteWrkStruct() */
3716 : /************************************************************************/
3717 :
3718 344 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
3719 : {
3720 344 : CPLFree(psWrkStruct->padfWeightsX);
3721 344 : CPLFree(psWrkStruct->padfWeightsY);
3722 344 : CPLFree(psWrkStruct->pabCalcX);
3723 344 : CPLFree(psWrkStruct->padfRowDensity);
3724 344 : CPLFree(psWrkStruct->padfRowReal);
3725 344 : CPLFree(psWrkStruct->padfRowImag);
3726 344 : CPLFree(psWrkStruct);
3727 344 : }
3728 :
3729 : /************************************************************************/
3730 : /* GWKResample() */
3731 : /************************************************************************/
3732 :
3733 239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3734 : double dfSrcY, double *pdfDensity, double *pdfReal,
3735 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
3736 :
3737 : {
3738 : // Save as local variables to avoid following pointers in loops.
3739 239383 : const int nSrcXSize = poWK->nSrcXSize;
3740 239383 : const int nSrcYSize = poWK->nSrcYSize;
3741 :
3742 239383 : double dfAccumulatorReal = 0.0;
3743 239383 : double dfAccumulatorImag = 0.0;
3744 239383 : double dfAccumulatorDensity = 0.0;
3745 239383 : double dfAccumulatorWeight = 0.0;
3746 239383 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3747 239383 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3748 239383 : const GPtrDiff_t iSrcOffset =
3749 239383 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3750 239383 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3751 239383 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3752 :
3753 239383 : const double dfXScale = poWK->dfXScale;
3754 239383 : const double dfYScale = poWK->dfYScale;
3755 :
3756 239383 : const int nXDist = (poWK->nXRadius + 1) * 2;
3757 :
3758 : // Space for saved X weights.
3759 239383 : double *padfWeightsX = psWrkStruct->padfWeightsX;
3760 239383 : bool *pabCalcX = psWrkStruct->pabCalcX;
3761 :
3762 : // Space for saving a row of pixels.
3763 239383 : double *padfRowDensity = psWrkStruct->padfRowDensity;
3764 239383 : double *padfRowReal = psWrkStruct->padfRowReal;
3765 239383 : double *padfRowImag = psWrkStruct->padfRowImag;
3766 :
3767 : // Mark as needing calculation (don't calculate the weights yet,
3768 : // because a mask may render it unnecessary).
3769 239383 : memset(pabCalcX, false, nXDist * sizeof(bool));
3770 :
3771 239383 : FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
3772 239383 : CPLAssert(pfnGetWeight);
3773 :
3774 : // Skip sampling over edge of image.
3775 239383 : int j = poWK->nFiltInitY;
3776 239383 : int jMax = poWK->nYRadius;
3777 239383 : if (iSrcY + j < 0)
3778 566 : j = -iSrcY;
3779 239383 : if (iSrcY + jMax >= nSrcYSize)
3780 662 : jMax = nSrcYSize - iSrcY - 1;
3781 :
3782 239383 : int iMin = poWK->nFiltInitX;
3783 239383 : int iMax = poWK->nXRadius;
3784 239383 : if (iSrcX + iMin < 0)
3785 566 : iMin = -iSrcX;
3786 239383 : if (iSrcX + iMax >= nSrcXSize)
3787 659 : iMax = nSrcXSize - iSrcX - 1;
3788 :
3789 239383 : const int bXScaleBelow1 = (dfXScale < 1.0);
3790 239383 : const int bYScaleBelow1 = (dfYScale < 1.0);
3791 :
3792 239383 : GPtrDiff_t iRowOffset =
3793 239383 : iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
3794 :
3795 : // Loop over pixel rows in the kernel.
3796 1445930 : for (; j <= jMax; ++j)
3797 : {
3798 1206540 : iRowOffset += nSrcXSize;
3799 :
3800 : // Get pixel values.
3801 : // We can potentially read extra elements after the "normal" end of the
3802 : // source arrays, but the contract of papabySrcImage[iBand],
3803 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
3804 : // is to have WARP_EXTRA_ELTS reserved at their end.
3805 1206540 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
3806 : padfRowDensity, padfRowReal, padfRowImag))
3807 72 : continue;
3808 :
3809 : // Calculate the Y weight.
3810 : double dfWeight1 = (bYScaleBelow1)
3811 1206470 : ? pfnGetWeight((j - dfDeltaY) * dfYScale)
3812 1600 : : pfnGetWeight(j - dfDeltaY);
3813 :
3814 : // Iterate over pixels in row.
3815 1206470 : double dfAccumulatorRealLocal = 0.0;
3816 1206470 : double dfAccumulatorImagLocal = 0.0;
3817 1206470 : double dfAccumulatorDensityLocal = 0.0;
3818 1206470 : double dfAccumulatorWeightLocal = 0.0;
3819 :
3820 7317420 : for (int i = iMin; i <= iMax; ++i)
3821 : {
3822 : // Skip sampling if pixel has zero density.
3823 6110940 : if (padfRowDensity != nullptr &&
3824 77277 : padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
3825 546 : continue;
3826 :
3827 6110400 : double dfWeight2 = 0.0;
3828 :
3829 : // Make or use a cached set of weights for this row.
3830 6110400 : if (pabCalcX[i - iMin])
3831 : {
3832 : // Use saved weight value instead of recomputing it.
3833 4903920 : dfWeight2 = padfWeightsX[i - iMin];
3834 : }
3835 : else
3836 : {
3837 : // Calculate & save the X weight.
3838 1206480 : padfWeightsX[i - iMin] = dfWeight2 =
3839 1206480 : (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
3840 1600 : : pfnGetWeight(i - dfDeltaX);
3841 :
3842 1206480 : pabCalcX[i - iMin] = true;
3843 : }
3844 :
3845 : // Accumulate!
3846 6110400 : dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
3847 6110400 : dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
3848 6110400 : if (padfRowDensity != nullptr)
3849 76731 : dfAccumulatorDensityLocal +=
3850 76731 : padfRowDensity[i - iMin] * dfWeight2;
3851 6110400 : dfAccumulatorWeightLocal += dfWeight2;
3852 : }
3853 :
3854 1206470 : dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
3855 1206470 : dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
3856 1206470 : dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
3857 1206470 : dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
3858 : }
3859 :
3860 239383 : if (dfAccumulatorWeight < 0.000001 ||
3861 1887 : (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
3862 : {
3863 0 : *pdfDensity = 0.0;
3864 0 : return false;
3865 : }
3866 :
3867 : // Calculate the output taking into account weighting.
3868 239383 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
3869 : {
3870 239380 : *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
3871 239380 : *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
3872 239380 : if (padfRowDensity != nullptr)
3873 1884 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
3874 : else
3875 237496 : *pdfDensity = 1.0;
3876 : }
3877 : else
3878 : {
3879 3 : *pdfReal = dfAccumulatorReal;
3880 3 : *pdfImag = dfAccumulatorImag;
3881 3 : if (padfRowDensity != nullptr)
3882 3 : *pdfDensity = dfAccumulatorDensity;
3883 : else
3884 0 : *pdfDensity = 1.0;
3885 : }
3886 :
3887 239383 : return true;
3888 : }
3889 :
3890 : /************************************************************************/
3891 : /* GWKResampleOptimizedLanczos() */
3892 : /************************************************************************/
3893 :
3894 617144 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3895 : double dfSrcX, double dfSrcY,
3896 : double *pdfDensity, double *pdfReal,
3897 : double *pdfImag,
3898 : GWKResampleWrkStruct *psWrkStruct)
3899 :
3900 : {
3901 : // Save as local variables to avoid following pointers in loops.
3902 617144 : const int nSrcXSize = poWK->nSrcXSize;
3903 617144 : const int nSrcYSize = poWK->nSrcYSize;
3904 :
3905 617144 : double dfAccumulatorReal = 0.0;
3906 617144 : double dfAccumulatorImag = 0.0;
3907 617144 : double dfAccumulatorDensity = 0.0;
3908 617144 : double dfAccumulatorWeight = 0.0;
3909 617144 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3910 617144 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3911 617144 : const GPtrDiff_t iSrcOffset =
3912 617144 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3913 617144 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3914 617144 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3915 :
3916 617144 : const double dfXScale = poWK->dfXScale;
3917 617144 : const double dfYScale = poWK->dfYScale;
3918 :
3919 : // Space for saved X weights.
3920 617144 : double *const padfWeightsXShifted =
3921 617144 : psWrkStruct->padfWeightsX - poWK->nFiltInitX;
3922 617144 : double *const padfWeightsYShifted =
3923 617144 : psWrkStruct->padfWeightsY - poWK->nFiltInitY;
3924 :
3925 : // Space for saving a row of pixels.
3926 617144 : double *const padfRowDensity = psWrkStruct->padfRowDensity;
3927 617144 : double *const padfRowReal = psWrkStruct->padfRowReal;
3928 617144 : double *const padfRowImag = psWrkStruct->padfRowImag;
3929 :
3930 : // Skip sampling over edge of image.
3931 617144 : int jMin = poWK->nFiltInitY;
3932 617144 : int jMax = poWK->nYRadius;
3933 617144 : if (iSrcY + jMin < 0)
3934 16522 : jMin = -iSrcY;
3935 617144 : if (iSrcY + jMax >= nSrcYSize)
3936 5782 : jMax = nSrcYSize - iSrcY - 1;
3937 :
3938 617144 : int iMin = poWK->nFiltInitX;
3939 617144 : int iMax = poWK->nXRadius;
3940 617144 : if (iSrcX + iMin < 0)
3941 15797 : iMin = -iSrcX;
3942 617144 : if (iSrcX + iMax >= nSrcXSize)
3943 4657 : iMax = nSrcXSize - iSrcX - 1;
3944 :
3945 617144 : if (dfXScale < 1.0)
3946 : {
3947 403041 : while ((iMin - dfDeltaX) * dfXScale < -3.0)
3948 200179 : iMin++;
3949 202862 : while ((iMax - dfDeltaX) * dfXScale > 3.0)
3950 0 : iMax--;
3951 :
3952 : // clang-format off
3953 : /*
3954 : Naive version:
3955 : for (int i = iMin; i <= iMax; ++i)
3956 : {
3957 : psWrkStruct->padfWeightsXShifted[i] =
3958 : GWKLanczosSinc((i - dfDeltaX) * dfXScale);
3959 : }
3960 :
3961 : but given that:
3962 :
3963 : GWKLanczosSinc(x):
3964 : if (dfX == 0.0)
3965 : return 1.0;
3966 :
3967 : const double dfPIX = M_PI * dfX;
3968 : const double dfPIXoverR = dfPIX / 3;
3969 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3970 : return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
3971 :
3972 : and
3973 : sin (a + b) = sin a cos b + cos a sin b.
3974 : cos (a + b) = cos a cos b - sin a sin b.
3975 :
3976 : we can skip any sin() computation within the loop
3977 : */
3978 : // clang-format on
3979 :
3980 202862 : if (iSrcX != psWrkStruct->iLastSrcX ||
3981 131072 : dfDeltaX != psWrkStruct->dfLastDeltaX)
3982 : {
3983 71790 : double dfX = (iMin - dfDeltaX) * dfXScale;
3984 :
3985 71790 : double dfPIXover3 = M_PI / 3 * dfX;
3986 71790 : double dfCosOver3 = cos(dfPIXover3);
3987 71790 : double dfSinOver3 = sin(dfPIXover3);
3988 :
3989 : // "Naive":
3990 : // double dfSin = sin( M_PI * dfX );
3991 : // double dfCos = cos( M_PI * dfX );
3992 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
3993 71790 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
3994 71790 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
3995 :
3996 71790 : const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
3997 71790 : const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
3998 71790 : const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
3999 71790 : const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
4000 71790 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4001 71790 : padfWeightsXShifted[iMin] =
4002 71790 : dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
4003 1636480 : for (int i = iMin + 1; i <= iMax; ++i)
4004 : {
4005 1564690 : dfX += dfXScale;
4006 1564690 : const double dfNewSin =
4007 1564690 : dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
4008 1564690 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
4009 1564690 : dfCosOver3 * dfSinPiXScaleOver3;
4010 1564690 : padfWeightsXShifted[i] =
4011 : dfX == 0
4012 1564690 : ? 1.0
4013 1564690 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
4014 1564690 : const double dfNewCos =
4015 1564690 : dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
4016 1564690 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
4017 1564690 : dfSinOver3 * dfSinPiXScaleOver3;
4018 1564690 : dfSin = dfNewSin;
4019 1564690 : dfCos = dfNewCos;
4020 1564690 : dfSinOver3 = dfNewSinOver3;
4021 1564690 : dfCosOver3 = dfNewCosOver3;
4022 : }
4023 :
4024 71790 : psWrkStruct->iLastSrcX = iSrcX;
4025 71790 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4026 : }
4027 : }
4028 : else
4029 : {
4030 757542 : while (iMin - dfDeltaX < -3.0)
4031 343260 : iMin++;
4032 414282 : while (iMax - dfDeltaX > 3.0)
4033 0 : iMax--;
4034 :
4035 414282 : if (iSrcX != psWrkStruct->iLastSrcX ||
4036 209580 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4037 : {
4038 : // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
4039 : // following trigonometric formulas.
4040 :
4041 : // TODO(schwehr): Move this somewhere where it can be rendered at
4042 : // LaTeX.
4043 : // clang-format off
4044 : // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
4045 : // cos(M_PI * dfBase) * sin(M_PI * k)
4046 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
4047 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
4048 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
4049 :
4050 : // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
4051 : // cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
4052 : // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
4053 : // clang-format on
4054 :
4055 414282 : const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
4056 414282 : const double dfSin2PIDeltaXOver3 =
4057 : dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
4058 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
4059 414282 : const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
4060 414282 : const double dfSinPIDeltaX =
4061 414282 : (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
4062 414282 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4063 414282 : const double dfInvPI2Over3xSinPIDeltaX =
4064 : dfInvPI2Over3 * dfSinPIDeltaX;
4065 414282 : const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
4066 414282 : -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
4067 414282 : const double dfSinPIOver3 = 0.8660254037844386;
4068 414282 : const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
4069 414282 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
4070 : const double padfCst[] = {
4071 414282 : dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
4072 414282 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
4073 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
4074 414282 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
4075 414282 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
4076 :
4077 2936860 : for (int i = iMin; i <= iMax; ++i)
4078 : {
4079 2522570 : const double dfX = i - dfDeltaX;
4080 2522570 : if (dfX == 0.0)
4081 58282 : padfWeightsXShifted[i] = 1.0;
4082 : else
4083 2464290 : padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
4084 : #if DEBUG_VERBOSE
4085 : // TODO(schwehr): AlmostEqual.
4086 : // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
4087 : // GWKLanczosSinc(dfX, 3.0)) < 1e-10);
4088 : #endif
4089 : }
4090 :
4091 414282 : psWrkStruct->iLastSrcX = iSrcX;
4092 414282 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4093 : }
4094 : }
4095 :
4096 617144 : if (dfYScale < 1.0)
4097 : {
4098 403116 : while ((jMin - dfDeltaY) * dfYScale < -3.0)
4099 200254 : jMin++;
4100 202862 : while ((jMax - dfDeltaY) * dfYScale > 3.0)
4101 0 : jMax--;
4102 :
4103 : // clang-format off
4104 : /*
4105 : Naive version:
4106 : for (int j = jMin; j <= jMax; ++j)
4107 : {
4108 : padfWeightsYShifted[j] =
4109 : GWKLanczosSinc((j - dfDeltaY) * dfYScale);
4110 : }
4111 : */
4112 : // clang-format on
4113 :
4114 202862 : if (iSrcY != psWrkStruct->iLastSrcY ||
4115 202479 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4116 : {
4117 383 : double dfY = (jMin - dfDeltaY) * dfYScale;
4118 :
4119 383 : double dfPIYover3 = M_PI / 3 * dfY;
4120 383 : double dfCosOver3 = cos(dfPIYover3);
4121 383 : double dfSinOver3 = sin(dfPIYover3);
4122 :
4123 : // "Naive":
4124 : // double dfSin = sin( M_PI * dfY );
4125 : // double dfCos = cos( M_PI * dfY );
4126 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4127 383 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4128 383 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4129 :
4130 383 : const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
4131 383 : const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
4132 383 : const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
4133 383 : const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
4134 383 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4135 383 : padfWeightsYShifted[jMin] =
4136 383 : dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
4137 7318 : for (int j = jMin + 1; j <= jMax; ++j)
4138 : {
4139 6935 : dfY += dfYScale;
4140 6935 : const double dfNewSin =
4141 6935 : dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
4142 6935 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
4143 6935 : dfCosOver3 * dfSinPiYScaleOver3;
4144 6935 : padfWeightsYShifted[j] =
4145 : dfY == 0
4146 6935 : ? 1.0
4147 6935 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
4148 6935 : const double dfNewCos =
4149 6935 : dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
4150 6935 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
4151 6935 : dfSinOver3 * dfSinPiYScaleOver3;
4152 6935 : dfSin = dfNewSin;
4153 6935 : dfCos = dfNewCos;
4154 6935 : dfSinOver3 = dfNewSinOver3;
4155 6935 : dfCosOver3 = dfNewCosOver3;
4156 : }
4157 :
4158 383 : psWrkStruct->iLastSrcY = iSrcY;
4159 383 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4160 : }
4161 : }
4162 : else
4163 : {
4164 684742 : while (jMin - dfDeltaY < -3.0)
4165 270460 : jMin++;
4166 414282 : while (jMax - dfDeltaY > 3.0)
4167 0 : jMax--;
4168 :
4169 414282 : if (iSrcY != psWrkStruct->iLastSrcY ||
4170 413663 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4171 : {
4172 1132 : const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
4173 1132 : const double dfSin2PIDeltaYOver3 =
4174 : dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
4175 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
4176 1132 : const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
4177 1132 : const double dfSinPIDeltaY =
4178 1132 : (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
4179 1132 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4180 1132 : const double dfInvPI2Over3xSinPIDeltaY =
4181 : dfInvPI2Over3 * dfSinPIDeltaY;
4182 1132 : const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
4183 1132 : -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
4184 1132 : const double dfSinPIOver3 = 0.8660254037844386;
4185 1132 : const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
4186 1132 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
4187 : const double padfCst[] = {
4188 1132 : dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
4189 1132 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
4190 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
4191 1132 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
4192 1132 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
4193 :
4194 7917 : for (int j = jMin; j <= jMax; ++j)
4195 : {
4196 6785 : const double dfY = j - dfDeltaY;
4197 6785 : if (dfY == 0.0)
4198 460 : padfWeightsYShifted[j] = 1.0;
4199 : else
4200 6325 : padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
4201 : #if DEBUG_VERBOSE
4202 : // TODO(schwehr): AlmostEqual.
4203 : // CPLAssert(fabs(padfWeightsYShifted[j] -
4204 : // GWKLanczosSinc(dfY, 3.0)) < 1e-10);
4205 : #endif
4206 : }
4207 :
4208 1132 : psWrkStruct->iLastSrcY = iSrcY;
4209 1132 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4210 : }
4211 : }
4212 :
4213 : // If we have no density information, we can simply compute the
4214 : // accumulated weight.
4215 617144 : if (padfRowDensity == nullptr)
4216 : {
4217 617144 : double dfRowAccWeight = 0.0;
4218 7903490 : for (int i = iMin; i <= iMax; ++i)
4219 : {
4220 7286350 : dfRowAccWeight += padfWeightsXShifted[i];
4221 : }
4222 617144 : double dfColAccWeight = 0.0;
4223 7958040 : for (int j = jMin; j <= jMax; ++j)
4224 : {
4225 7340900 : dfColAccWeight += padfWeightsYShifted[j];
4226 : }
4227 617144 : dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
4228 : }
4229 :
4230 : // Loop over pixel rows in the kernel.
4231 :
4232 617144 : if (poWK->eWorkingDataType == GDT_Byte && !poWK->panUnifiedSrcValid &&
4233 616524 : !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
4234 : !padfRowDensity)
4235 : {
4236 : // Optimization for Byte case without any masking/alpha
4237 :
4238 616524 : if (dfAccumulatorWeight < 0.000001)
4239 : {
4240 0 : *pdfDensity = 0.0;
4241 0 : return false;
4242 : }
4243 :
4244 616524 : const GByte *pSrc =
4245 616524 : reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
4246 616524 : pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4247 :
4248 : #if defined(USE_SSE2)
4249 616524 : if (iMax - iMin + 1 == 6)
4250 : {
4251 : // This is just an optimized version of the general case in
4252 : // the else clause.
4253 :
4254 346854 : pSrc += iMin;
4255 346854 : int j = jMin;
4256 : const auto fourXWeights =
4257 346854 : XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
4258 :
4259 : // Process 2 lines at the same time.
4260 1375860 : for (; j < jMax; j += 2)
4261 : {
4262 : const XMMReg4Double v_acc =
4263 1029000 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4264 : const XMMReg4Double v_acc2 =
4265 1029000 : XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
4266 1029000 : const double dfRowAcc = v_acc.GetHorizSum();
4267 1029000 : const double dfRowAccEnd =
4268 1029000 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4269 1029000 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4270 1029000 : dfAccumulatorReal +=
4271 1029000 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4272 1029000 : const double dfRowAcc2 = v_acc2.GetHorizSum();
4273 1029000 : const double dfRowAcc2End =
4274 1029000 : pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
4275 1029000 : pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
4276 1029000 : dfAccumulatorReal +=
4277 1029000 : (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
4278 1029000 : pSrc += 2 * nSrcXSize;
4279 : }
4280 346854 : if (j == jMax)
4281 : {
4282 : // Process last line if there's an odd number of them.
4283 :
4284 : const XMMReg4Double v_acc =
4285 86045 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4286 86045 : const double dfRowAcc = v_acc.GetHorizSum();
4287 86045 : const double dfRowAccEnd =
4288 86045 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4289 86045 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4290 86045 : dfAccumulatorReal +=
4291 86045 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4292 : }
4293 : }
4294 : else
4295 : #endif
4296 : {
4297 5463580 : for (int j = jMin; j <= jMax; ++j)
4298 : {
4299 5193900 : int i = iMin;
4300 5193900 : double dfRowAcc1 = 0.0;
4301 5193900 : double dfRowAcc2 = 0.0;
4302 : // A bit of loop unrolling
4303 62750600 : for (; i < iMax; i += 2)
4304 : {
4305 57556700 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4306 57556700 : dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
4307 : }
4308 5193900 : if (i == iMax)
4309 : {
4310 : // Process last column if there's an odd number of them.
4311 426183 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4312 : }
4313 :
4314 5193900 : dfAccumulatorReal +=
4315 5193900 : (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
4316 5193900 : pSrc += nSrcXSize;
4317 : }
4318 : }
4319 :
4320 : // Calculate the output taking into account weighting.
4321 616524 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4322 : {
4323 569230 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4324 569230 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4325 569230 : *pdfDensity = 1.0;
4326 : }
4327 : else
4328 : {
4329 47294 : *pdfReal = dfAccumulatorReal;
4330 47294 : *pdfDensity = 1.0;
4331 : }
4332 :
4333 616524 : return true;
4334 : }
4335 :
4336 620 : GPtrDiff_t iRowOffset =
4337 620 : iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
4338 :
4339 620 : int nCountValid = 0;
4340 620 : const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
4341 :
4342 3560 : for (int j = jMin; j <= jMax; ++j)
4343 : {
4344 2940 : iRowOffset += nSrcXSize;
4345 :
4346 : // Get pixel values.
4347 : // We can potentially read extra elements after the "normal" end of the
4348 : // source arrays, but the contract of papabySrcImage[iBand],
4349 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4350 : // is to have WARP_EXTRA_ELTS reserved at their end.
4351 2940 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4352 : padfRowDensity, padfRowReal, padfRowImag))
4353 0 : continue;
4354 :
4355 2940 : const double dfWeight1 = padfWeightsYShifted[j];
4356 :
4357 : // Iterate over pixels in row.
4358 2940 : if (padfRowDensity != nullptr)
4359 : {
4360 0 : for (int i = iMin; i <= iMax; ++i)
4361 : {
4362 : // Skip sampling if pixel has zero density.
4363 0 : if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
4364 0 : continue;
4365 :
4366 0 : nCountValid++;
4367 :
4368 : // Use a cached set of weights for this row.
4369 0 : const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
4370 :
4371 : // Accumulate!
4372 0 : dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
4373 0 : dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
4374 0 : dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
4375 0 : dfAccumulatorWeight += dfWeight2;
4376 : }
4377 : }
4378 2940 : else if (bIsNonComplex)
4379 : {
4380 1764 : double dfRowAccReal = 0.0;
4381 10560 : for (int i = iMin; i <= iMax; ++i)
4382 : {
4383 8796 : const double dfWeight2 = padfWeightsXShifted[i];
4384 :
4385 : // Accumulate!
4386 8796 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4387 : }
4388 :
4389 1764 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4390 : }
4391 : else
4392 : {
4393 1176 : double dfRowAccReal = 0.0;
4394 1176 : double dfRowAccImag = 0.0;
4395 7040 : for (int i = iMin; i <= iMax; ++i)
4396 : {
4397 5864 : const double dfWeight2 = padfWeightsXShifted[i];
4398 :
4399 : // Accumulate!
4400 5864 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4401 5864 : dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
4402 : }
4403 :
4404 1176 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4405 1176 : dfAccumulatorImag += dfRowAccImag * dfWeight1;
4406 : }
4407 : }
4408 :
4409 620 : if (dfAccumulatorWeight < 0.000001 ||
4410 0 : (padfRowDensity != nullptr &&
4411 0 : (dfAccumulatorDensity < 0.000001 ||
4412 0 : nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
4413 : {
4414 0 : *pdfDensity = 0.0;
4415 0 : return false;
4416 : }
4417 :
4418 : // Calculate the output taking into account weighting.
4419 620 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4420 : {
4421 0 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4422 0 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4423 0 : *pdfImag = dfAccumulatorImag * dfInvAcc;
4424 0 : if (padfRowDensity != nullptr)
4425 0 : *pdfDensity = dfAccumulatorDensity * dfInvAcc;
4426 : else
4427 0 : *pdfDensity = 1.0;
4428 : }
4429 : else
4430 : {
4431 620 : *pdfReal = dfAccumulatorReal;
4432 620 : *pdfImag = dfAccumulatorImag;
4433 620 : if (padfRowDensity != nullptr)
4434 0 : *pdfDensity = dfAccumulatorDensity;
4435 : else
4436 620 : *pdfDensity = 1.0;
4437 : }
4438 :
4439 620 : return true;
4440 : }
4441 :
4442 : /************************************************************************/
4443 : /* GWKComputeWeights() */
4444 : /************************************************************************/
4445 :
4446 3744970 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
4447 : double dfDeltaX, double dfXScale, int jMin,
4448 : int jMax, double dfDeltaY, double dfYScale,
4449 : double *padfWeightsHorizontal,
4450 : double *padfWeightsVertical, double &dfInvWeights)
4451 : {
4452 :
4453 3744970 : const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
4454 3744970 : CPLAssert(pfnGetWeight);
4455 3744970 : const FilterFunc4ValuesType pfnGetWeight4Values =
4456 3744970 : apfGWKFilter4Values[eResample];
4457 3744970 : CPLAssert(pfnGetWeight4Values);
4458 :
4459 3744970 : int i = iMin; // Used after for.
4460 3744970 : int iC = 0; // Used after for.
4461 : // Not zero, but as close as possible to it, to avoid potential division by
4462 : // zero at end of function
4463 3744970 : double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
4464 8311770 : for (; i + 2 < iMax; i += 4, iC += 4)
4465 : {
4466 4568590 : padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
4467 4568590 : padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
4468 4568590 : padfWeightsHorizontal[iC + 2] =
4469 4568590 : padfWeightsHorizontal[iC + 1] + dfXScale;
4470 4568590 : padfWeightsHorizontal[iC + 3] =
4471 4568590 : padfWeightsHorizontal[iC + 2] + dfXScale;
4472 4566800 : dfAccumulatorWeightHorizontal +=
4473 4568590 : pfnGetWeight4Values(padfWeightsHorizontal + iC);
4474 : }
4475 3964100 : for (; i <= iMax; ++i, ++iC)
4476 : {
4477 219566 : const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
4478 220922 : padfWeightsHorizontal[iC] = dfWeight;
4479 220922 : dfAccumulatorWeightHorizontal += dfWeight;
4480 : }
4481 :
4482 3744530 : int j = jMin; // Used after for.
4483 3744530 : int jC = 0; // Used after for.
4484 : // Not zero, but as close as possible to it, to avoid potential division by
4485 : // zero at end of function
4486 3744530 : double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
4487 7885720 : for (; j + 2 < jMax; j += 4, jC += 4)
4488 : {
4489 4148740 : padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
4490 4148740 : padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
4491 4148740 : padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
4492 4148740 : padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
4493 4141190 : dfAccumulatorWeightVertical +=
4494 4148740 : pfnGetWeight4Values(padfWeightsVertical + jC);
4495 : }
4496 8263440 : for (; j <= jMax; ++j, ++jC)
4497 : {
4498 4517490 : const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
4499 4526450 : padfWeightsVertical[jC] = dfWeight;
4500 4526450 : dfAccumulatorWeightVertical += dfWeight;
4501 : }
4502 :
4503 3745940 : dfInvWeights =
4504 3745940 : 1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
4505 3745940 : }
4506 :
4507 : /************************************************************************/
4508 : /* GWKResampleNoMasksT() */
4509 : /************************************************************************/
4510 :
4511 : template <class T>
4512 : static bool
4513 : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4514 : double dfSrcY, T *pValue, double *padfWeightsHorizontal,
4515 : double *padfWeightsVertical, double &dfInvWeights)
4516 :
4517 : {
4518 : // Commonly used; save locally.
4519 : const int nSrcXSize = poWK->nSrcXSize;
4520 : const int nSrcYSize = poWK->nSrcYSize;
4521 :
4522 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4523 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4524 : const GPtrDiff_t iSrcOffset =
4525 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4526 :
4527 : const int nXRadius = poWK->nXRadius;
4528 : const int nYRadius = poWK->nYRadius;
4529 :
4530 : // Politely refuse to process invalid coordinates or obscenely small image.
4531 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4532 : nYRadius > nSrcYSize)
4533 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4534 : pValue);
4535 :
4536 : T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
4537 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4538 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4539 :
4540 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4541 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4542 :
4543 : int iMin = 1 - nXRadius;
4544 : if (iSrcX + iMin < 0)
4545 : iMin = -iSrcX;
4546 : int iMax = nXRadius;
4547 : if (iSrcX + iMax >= nSrcXSize - 1)
4548 : iMax = nSrcXSize - 1 - iSrcX;
4549 :
4550 : int jMin = 1 - nYRadius;
4551 : if (iSrcY + jMin < 0)
4552 : jMin = -iSrcY;
4553 : int jMax = nYRadius;
4554 : if (iSrcY + jMax >= nSrcYSize - 1)
4555 : jMax = nSrcYSize - 1 - iSrcY;
4556 :
4557 : if (iBand == 0)
4558 : {
4559 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4560 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4561 : padfWeightsVertical, dfInvWeights);
4562 : }
4563 :
4564 : // Loop over all rows in the kernel.
4565 : double dfAccumulator = 0.0;
4566 : for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
4567 : {
4568 : const GPtrDiff_t iSampJ =
4569 : iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
4570 :
4571 : // Loop over all pixels in the row.
4572 : double dfAccumulatorLocal = 0.0;
4573 : double dfAccumulatorLocal2 = 0.0;
4574 : int iC = 0;
4575 : int i = iMin;
4576 : // Process by chunk of 4 cols.
4577 : for (; i + 2 < iMax; i += 4, iC += 4)
4578 : {
4579 : // Retrieve the pixel & accumulate.
4580 : dfAccumulatorLocal +=
4581 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4582 : dfAccumulatorLocal +=
4583 : pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4584 : dfAccumulatorLocal2 +=
4585 : pSrcBand[i + 2 + iSampJ] * padfWeightsHorizontal[iC + 2];
4586 : dfAccumulatorLocal2 +=
4587 : pSrcBand[i + 3 + iSampJ] * padfWeightsHorizontal[iC + 3];
4588 : }
4589 : dfAccumulatorLocal += dfAccumulatorLocal2;
4590 : if (i < iMax)
4591 : {
4592 : dfAccumulatorLocal +=
4593 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4594 : dfAccumulatorLocal +=
4595 : pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4596 : i += 2;
4597 : iC += 2;
4598 : }
4599 : if (i == iMax)
4600 : {
4601 : dfAccumulatorLocal +=
4602 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4603 : }
4604 :
4605 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4606 : }
4607 :
4608 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4609 :
4610 : return true;
4611 : }
4612 :
4613 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
4614 : /* Could possibly be used too on 32bit, but we would need to check at runtime */
4615 : #if defined(USE_SSE2)
4616 :
4617 : /************************************************************************/
4618 : /* GWKResampleNoMasks_SSE2_T() */
4619 : /************************************************************************/
4620 :
4621 : template <class T>
4622 9141413 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
4623 : double dfSrcX, double dfSrcY, T *pValue,
4624 : double *padfWeightsHorizontal,
4625 : double *padfWeightsVertical,
4626 : double &dfInvWeights)
4627 : {
4628 : // Commonly used; save locally.
4629 9141413 : const int nSrcXSize = poWK->nSrcXSize;
4630 9141413 : const int nSrcYSize = poWK->nSrcYSize;
4631 :
4632 9141413 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4633 9141413 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4634 9141413 : const GPtrDiff_t iSrcOffset =
4635 9141413 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4636 9141413 : const int nXRadius = poWK->nXRadius;
4637 9141413 : const int nYRadius = poWK->nYRadius;
4638 :
4639 : // Politely refuse to process invalid coordinates or obscenely small image.
4640 9141413 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4641 : nYRadius > nSrcYSize)
4642 2 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4643 3 : pValue);
4644 :
4645 9161211 : const T *pSrcBand =
4646 9161211 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
4647 :
4648 9161211 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4649 9161211 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4650 9161211 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4651 9164311 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4652 :
4653 9170271 : int iMin = 1 - nXRadius;
4654 9170271 : if (iSrcX + iMin < 0)
4655 43130 : iMin = -iSrcX;
4656 9170271 : int iMax = nXRadius;
4657 9170271 : if (iSrcX + iMax >= nSrcXSize - 1)
4658 38080 : iMax = nSrcXSize - 1 - iSrcX;
4659 :
4660 9170271 : int jMin = 1 - nYRadius;
4661 9170271 : if (iSrcY + jMin < 0)
4662 49554 : jMin = -iSrcY;
4663 9170271 : int jMax = nYRadius;
4664 9170271 : if (iSrcY + jMax >= nSrcYSize - 1)
4665 35984 : jMax = nSrcYSize - 1 - iSrcY;
4666 :
4667 9170271 : if (iBand == 0)
4668 : {
4669 3743281 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4670 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4671 : padfWeightsVertical, dfInvWeights);
4672 : }
4673 :
4674 9159941 : GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4675 : // Process by chunk of 4 rows.
4676 9159941 : int jC = 0;
4677 9159941 : int j = jMin;
4678 9159941 : double dfAccumulator = 0.0;
4679 19409893 : for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
4680 : {
4681 : // Loop over all pixels in the row.
4682 10269922 : int iC = 0;
4683 10269922 : int i = iMin;
4684 : // Process by chunk of 4 cols.
4685 10269922 : XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
4686 10249512 : XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
4687 10248082 : XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
4688 10255552 : XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
4689 26752280 : for (; i + 2 < iMax; i += 4, iC += 4)
4690 : {
4691 : // Retrieve the pixel & accumulate.
4692 16505288 : XMMReg4Double v_pixels_1 =
4693 16505288 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4694 16481488 : XMMReg4Double v_pixels_2 =
4695 16481488 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
4696 16513188 : XMMReg4Double v_pixels_3 =
4697 16513188 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4698 16509988 : XMMReg4Double v_pixels_4 =
4699 16509988 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4700 :
4701 16518988 : XMMReg4Double v_padfWeight =
4702 16518988 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4703 :
4704 16500588 : v_acc_1 += v_pixels_1 * v_padfWeight;
4705 16509188 : v_acc_2 += v_pixels_2 * v_padfWeight;
4706 16513988 : v_acc_3 += v_pixels_3 * v_padfWeight;
4707 16516788 : v_acc_4 += v_pixels_4 * v_padfWeight;
4708 : }
4709 :
4710 10246972 : if (i < iMax)
4711 : {
4712 142670 : XMMReg2Double v_pixels_1 =
4713 142670 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
4714 142670 : XMMReg2Double v_pixels_2 =
4715 142670 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
4716 142670 : XMMReg2Double v_pixels_3 =
4717 142670 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4718 142669 : XMMReg2Double v_pixels_4 =
4719 142669 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4720 :
4721 142670 : XMMReg2Double v_padfWeight =
4722 142670 : XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
4723 :
4724 142670 : v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
4725 142670 : v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
4726 142670 : v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
4727 142670 : v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
4728 :
4729 142670 : i += 2;
4730 142670 : iC += 2;
4731 : }
4732 :
4733 10246972 : double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
4734 10246832 : double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
4735 10264132 : double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
4736 10255742 : double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
4737 :
4738 10249992 : if (i == iMax)
4739 : {
4740 49171 : dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
4741 49171 : padfWeightsHorizontal[iC];
4742 49171 : dfAccumulatorLocal_2 +=
4743 49171 : static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
4744 49171 : padfWeightsHorizontal[iC];
4745 49171 : dfAccumulatorLocal_3 +=
4746 49171 : static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
4747 49171 : padfWeightsHorizontal[iC];
4748 49171 : dfAccumulatorLocal_4 +=
4749 49171 : static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
4750 49171 : padfWeightsHorizontal[iC];
4751 : }
4752 :
4753 10249992 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
4754 10249992 : dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
4755 10249992 : dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
4756 10249992 : dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
4757 : }
4758 22245041 : for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
4759 : {
4760 : // Loop over all pixels in the row.
4761 13076340 : int iC = 0;
4762 13076340 : int i = iMin;
4763 : // Process by chunk of 4 cols.
4764 13076340 : XMMReg4Double v_acc = XMMReg4Double::Zero();
4765 26177263 : for (; i + 2 < iMax; i += 4, iC += 4)
4766 : {
4767 : // Retrieve the pixel & accumulate.
4768 13104523 : XMMReg4Double v_pixels =
4769 13104523 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4770 13118923 : XMMReg4Double v_padfWeight =
4771 13118923 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4772 :
4773 13120823 : v_acc += v_pixels * v_padfWeight;
4774 : }
4775 :
4776 13072740 : double dfAccumulatorLocal = v_acc.GetHorizSum();
4777 :
4778 13105040 : if (i < iMax)
4779 : {
4780 173904 : dfAccumulatorLocal +=
4781 173904 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4782 173904 : dfAccumulatorLocal +=
4783 173904 : pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4784 173904 : i += 2;
4785 173904 : iC += 2;
4786 : }
4787 13105040 : if (i == iMax)
4788 : {
4789 33014 : dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
4790 33014 : padfWeightsHorizontal[iC];
4791 : }
4792 :
4793 13105040 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4794 : }
4795 :
4796 9168671 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4797 :
4798 9173721 : return true;
4799 : }
4800 :
4801 : /************************************************************************/
4802 : /* GWKResampleNoMasksT<GByte>() */
4803 : /************************************************************************/
4804 :
4805 : template <>
4806 8581550 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
4807 : double dfSrcX, double dfSrcY, GByte *pValue,
4808 : double *padfWeightsHorizontal,
4809 : double *padfWeightsVertical,
4810 : double &dfInvWeights)
4811 : {
4812 8581550 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4813 : padfWeightsHorizontal, padfWeightsVertical,
4814 8547000 : dfInvWeights);
4815 : }
4816 :
4817 : /************************************************************************/
4818 : /* GWKResampleNoMasksT<GInt16>() */
4819 : /************************************************************************/
4820 :
4821 : template <>
4822 252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
4823 : double dfSrcX, double dfSrcY, GInt16 *pValue,
4824 : double *padfWeightsHorizontal,
4825 : double *padfWeightsVertical,
4826 : double &dfInvWeights)
4827 : {
4828 252563 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4829 : padfWeightsHorizontal, padfWeightsVertical,
4830 252563 : dfInvWeights);
4831 : }
4832 :
4833 : /************************************************************************/
4834 : /* GWKResampleNoMasksT<GUInt16>() */
4835 : /************************************************************************/
4836 :
4837 : template <>
4838 343440 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
4839 : double dfSrcX, double dfSrcY, GUInt16 *pValue,
4840 : double *padfWeightsHorizontal,
4841 : double *padfWeightsVertical,
4842 : double &dfInvWeights)
4843 : {
4844 343440 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4845 : padfWeightsHorizontal, padfWeightsVertical,
4846 343440 : dfInvWeights);
4847 : }
4848 :
4849 : /************************************************************************/
4850 : /* GWKResampleNoMasksT<float>() */
4851 : /************************************************************************/
4852 :
4853 : template <>
4854 2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
4855 : double dfSrcX, double dfSrcY, float *pValue,
4856 : double *padfWeightsHorizontal,
4857 : double *padfWeightsVertical,
4858 : double &dfInvWeights)
4859 : {
4860 2500 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4861 : padfWeightsHorizontal, padfWeightsVertical,
4862 2500 : dfInvWeights);
4863 : }
4864 :
4865 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
4866 :
4867 : /************************************************************************/
4868 : /* GWKResampleNoMasksT<double>() */
4869 : /************************************************************************/
4870 :
4871 : template <>
4872 : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
4873 : double dfSrcX, double dfSrcY, double *pValue,
4874 : double *padfWeightsHorizontal,
4875 : double *padfWeightsVertical,
4876 : double &dfInvWeights)
4877 : {
4878 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4879 : padfWeightsHorizontal, padfWeightsVertical,
4880 : dfInvWeights);
4881 : }
4882 :
4883 : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
4884 :
4885 : #endif /* defined(USE_SSE2) */
4886 :
4887 : /************************************************************************/
4888 : /* GWKRoundSourceCoordinates() */
4889 : /************************************************************************/
4890 :
4891 1000 : static void GWKRoundSourceCoordinates(
4892 : int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
4893 : double dfSrcCoordPrecision, double dfErrorThreshold,
4894 : GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
4895 : double dfDstY)
4896 : {
4897 1000 : double dfPct = 0.8;
4898 1000 : if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
4899 : {
4900 1000 : dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
4901 : }
4902 1000 : const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
4903 :
4904 501000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
4905 : {
4906 500000 : const double dfXBefore = padfX[iDstX];
4907 500000 : const double dfYBefore = padfY[iDstX];
4908 500000 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4909 : dfSrcCoordPrecision;
4910 500000 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4911 : dfSrcCoordPrecision;
4912 :
4913 : // If we are in an uncertainty zone, go to non-approximated
4914 : // transformation.
4915 : // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
4916 : // be at least 10 times greater than the approximation error.
4917 500000 : if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
4918 399914 : fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
4919 : {
4920 180090 : padfX[iDstX] = iDstX + dfDstXOff;
4921 180090 : padfY[iDstX] = dfDstY;
4922 180090 : padfZ[iDstX] = 0.0;
4923 180090 : pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
4924 180090 : padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
4925 180090 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4926 : dfSrcCoordPrecision;
4927 180090 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4928 : dfSrcCoordPrecision;
4929 : }
4930 : }
4931 1000 : }
4932 :
4933 : /************************************************************************/
4934 : /* GWKCheckAndComputeSrcOffsets() */
4935 : /************************************************************************/
4936 : static CPL_INLINE bool
4937 108372000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
4938 : int _iDstY, double *_padfX, double *_padfY,
4939 : int _nSrcXSize, int _nSrcYSize,
4940 : GPtrDiff_t &iSrcOffset)
4941 : {
4942 108372000 : const GDALWarpKernel *_poWK = psJob->poWK;
4943 108504000 : for (int iTry = 0; iTry < 2; ++iTry)
4944 : {
4945 108416000 : if (iTry == 1)
4946 : {
4947 : // If the source coordinate is slightly outside of the source raster
4948 : // retry to transform it alone, so that the exact coordinate
4949 : // transformer is used.
4950 :
4951 131944 : _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
4952 131944 : _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
4953 131944 : double dfZ = 0;
4954 131944 : _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
4955 131944 : _padfX + _iDstX, _padfY + _iDstX, &dfZ,
4956 131944 : _pabSuccess + _iDstX);
4957 : }
4958 108416000 : if (!_pabSuccess[_iDstX])
4959 3593470 : return false;
4960 :
4961 : // If this happens this is likely the symptom of a bug somewhere.
4962 104822000 : if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
4963 : {
4964 : static bool bNanCoordFound = false;
4965 0 : if (!bNanCoordFound)
4966 : {
4967 0 : CPLDebug("WARP",
4968 : "GWKCheckAndComputeSrcOffsets(): "
4969 : "NaN coordinate found on point %d.",
4970 : _iDstX);
4971 0 : bNanCoordFound = true;
4972 : }
4973 0 : return false;
4974 : }
4975 :
4976 : /* --------------------------------------------------------------------
4977 : */
4978 : /* Figure out what pixel we want in our source raster, and skip */
4979 : /* further processing if it is well off the source image. */
4980 : /* --------------------------------------------------------------------
4981 : */
4982 : /* We test against the value before casting to avoid the */
4983 : /* problem of asymmetric truncation effects around zero. That is */
4984 : /* -0.5 will be 0 when cast to an int. */
4985 104937000 : if (_padfX[_iDstX] < _poWK->nSrcXOff)
4986 : {
4987 : // If the source coordinate is slightly outside of the source raster
4988 : // retry to transform it alone, so that the exact coordinate
4989 : // transformer is used.
4990 4137660 : if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
4991 21506 : continue;
4992 4116150 : return false;
4993 : }
4994 :
4995 100800000 : if (_padfY[_iDstX] < _poWK->nSrcYOff)
4996 : {
4997 : // If the source coordinate is slightly outside of the source raster
4998 : // retry to transform it alone, so that the exact coordinate
4999 : // transformer is used.
5000 4892120 : if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
5001 41563 : continue;
5002 4850560 : return false;
5003 : }
5004 :
5005 : // Check for potential overflow when casting from float to int, (if
5006 : // operating outside natural projection area, padfX/Y can be a very huge
5007 : // positive number before doing the actual conversion), as such cast is
5008 : // undefined behavior that can trigger exception with some compilers
5009 : // (see #6753)
5010 95907700 : if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
5011 : {
5012 : // If the source coordinate is slightly outside of the source raster
5013 : // retry to transform it alone, so that the exact coordinate
5014 : // transformer is used.
5015 3503620 : if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
5016 33316 : continue;
5017 3470300 : return false;
5018 : }
5019 92404100 : if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
5020 : {
5021 : // If the source coordinate is slightly outside of the source raster
5022 : // retry to transform it alone, so that the exact coordinate
5023 : // transformer is used.
5024 3827570 : if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
5025 35559 : continue;
5026 3792010 : return false;
5027 : }
5028 :
5029 88576500 : break;
5030 : }
5031 :
5032 88664400 : int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
5033 88664400 : int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
5034 88664400 : if (iSrcX == _nSrcXSize)
5035 0 : iSrcX--;
5036 88664400 : if (iSrcY == _nSrcYSize)
5037 0 : iSrcY--;
5038 :
5039 : // Those checks should normally be OK given the previous ones.
5040 88664400 : CPLAssert(iSrcX >= 0);
5041 88664400 : CPLAssert(iSrcY >= 0);
5042 88664400 : CPLAssert(iSrcX < _nSrcXSize);
5043 88664400 : CPLAssert(iSrcY < _nSrcYSize);
5044 :
5045 88664400 : iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
5046 :
5047 88664400 : return true;
5048 : }
5049 :
5050 : /************************************************************************/
5051 : /* GWKOneSourceCornerFailsToReproject() */
5052 : /************************************************************************/
5053 :
5054 778 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
5055 : {
5056 778 : GDALWarpKernel *poWK = psJob->poWK;
5057 2324 : for (int iY = 0; iY <= 1; ++iY)
5058 : {
5059 4644 : for (int iX = 0; iX <= 1; ++iX)
5060 : {
5061 3098 : double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
5062 3098 : double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
5063 3098 : double dfZTmp = 0;
5064 3098 : int nSuccess = FALSE;
5065 3098 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
5066 : &dfYTmp, &dfZTmp, &nSuccess);
5067 3098 : if (!nSuccess)
5068 6 : return true;
5069 : }
5070 : }
5071 772 : return false;
5072 : }
5073 :
5074 : /************************************************************************/
5075 : /* GWKAdjustSrcOffsetOnEdge() */
5076 : /************************************************************************/
5077 :
5078 9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
5079 : GPtrDiff_t &iSrcOffset)
5080 : {
5081 9714 : GDALWarpKernel *poWK = psJob->poWK;
5082 9714 : const int nSrcXSize = poWK->nSrcXSize;
5083 9714 : const int nSrcYSize = poWK->nSrcYSize;
5084 :
5085 : // Check if the computed source position slightly altered
5086 : // fails to reproject. If so, then we are at the edge of
5087 : // the validity area, and it is worth checking neighbour
5088 : // source pixels for validity.
5089 9714 : int nSuccess = FALSE;
5090 : {
5091 9714 : double dfXTmp =
5092 9714 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5093 9714 : double dfYTmp =
5094 9714 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5095 9714 : double dfZTmp = 0;
5096 9714 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5097 : &dfZTmp, &nSuccess);
5098 : }
5099 9714 : if (nSuccess)
5100 : {
5101 6996 : double dfXTmp =
5102 6996 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5103 6996 : double dfYTmp =
5104 6996 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5105 6996 : double dfZTmp = 0;
5106 6996 : nSuccess = FALSE;
5107 6996 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5108 : &dfZTmp, &nSuccess);
5109 : }
5110 9714 : if (nSuccess)
5111 : {
5112 5624 : double dfXTmp =
5113 5624 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5114 5624 : double dfYTmp =
5115 5624 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5116 5624 : double dfZTmp = 0;
5117 5624 : nSuccess = FALSE;
5118 5624 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5119 : &dfZTmp, &nSuccess);
5120 : }
5121 :
5122 14166 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5123 4452 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
5124 : {
5125 1860 : iSrcOffset++;
5126 1860 : return true;
5127 : }
5128 10290 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5129 2436 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
5130 : {
5131 1334 : iSrcOffset += nSrcXSize;
5132 1334 : return true;
5133 : }
5134 7838 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5135 1318 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
5136 : {
5137 956 : iSrcOffset--;
5138 956 : return true;
5139 : }
5140 5924 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5141 360 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
5142 : {
5143 340 : iSrcOffset -= nSrcXSize;
5144 340 : return true;
5145 : }
5146 :
5147 5224 : return false;
5148 : }
5149 :
5150 : /************************************************************************/
5151 : /* GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity() */
5152 : /************************************************************************/
5153 :
5154 0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
5155 : GPtrDiff_t &iSrcOffset)
5156 : {
5157 0 : GDALWarpKernel *poWK = psJob->poWK;
5158 0 : const int nSrcXSize = poWK->nSrcXSize;
5159 0 : const int nSrcYSize = poWK->nSrcYSize;
5160 :
5161 : // Check if the computed source position slightly altered
5162 : // fails to reproject. If so, then we are at the edge of
5163 : // the validity area, and it is worth checking neighbour
5164 : // source pixels for validity.
5165 0 : int nSuccess = FALSE;
5166 : {
5167 0 : double dfXTmp =
5168 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5169 0 : double dfYTmp =
5170 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5171 0 : double dfZTmp = 0;
5172 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5173 : &dfZTmp, &nSuccess);
5174 : }
5175 0 : if (nSuccess)
5176 : {
5177 0 : double dfXTmp =
5178 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5179 0 : double dfYTmp =
5180 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5181 0 : double dfZTmp = 0;
5182 0 : nSuccess = FALSE;
5183 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5184 : &dfZTmp, &nSuccess);
5185 : }
5186 0 : if (nSuccess)
5187 : {
5188 0 : double dfXTmp =
5189 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5190 0 : double dfYTmp =
5191 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5192 0 : double dfZTmp = 0;
5193 0 : nSuccess = FALSE;
5194 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5195 : &dfZTmp, &nSuccess);
5196 : }
5197 :
5198 0 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5199 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >= SRC_DENSITY_THRESHOLD)
5200 : {
5201 0 : iSrcOffset++;
5202 0 : return true;
5203 : }
5204 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5205 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
5206 : SRC_DENSITY_THRESHOLD)
5207 : {
5208 0 : iSrcOffset += nSrcXSize;
5209 0 : return true;
5210 : }
5211 0 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5212 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
5213 : SRC_DENSITY_THRESHOLD)
5214 : {
5215 0 : iSrcOffset--;
5216 0 : return true;
5217 : }
5218 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5219 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
5220 : SRC_DENSITY_THRESHOLD)
5221 : {
5222 0 : iSrcOffset -= nSrcXSize;
5223 0 : return true;
5224 : }
5225 :
5226 0 : return false;
5227 : }
5228 :
5229 : /************************************************************************/
5230 : /* GWKGeneralCase() */
5231 : /* */
5232 : /* This is the most general case. It attempts to handle all */
5233 : /* possible features with relatively little concern for */
5234 : /* efficiency. */
5235 : /************************************************************************/
5236 :
5237 239 : static void GWKGeneralCaseThread(void *pData)
5238 : {
5239 239 : GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
5240 239 : GDALWarpKernel *poWK = psJob->poWK;
5241 239 : const int iYMin = psJob->iYMin;
5242 239 : const int iYMax = psJob->iYMax;
5243 : const double dfMultFactorVerticalShiftPipeline =
5244 239 : poWK->bApplyVerticalShift
5245 239 : ? CPLAtof(CSLFetchNameValueDef(
5246 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5247 : "1.0"))
5248 239 : : 0.0;
5249 :
5250 239 : int nDstXSize = poWK->nDstXSize;
5251 239 : int nSrcXSize = poWK->nSrcXSize;
5252 239 : int nSrcYSize = poWK->nSrcYSize;
5253 :
5254 : /* -------------------------------------------------------------------- */
5255 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5256 : /* scanlines worth of positions. */
5257 : /* -------------------------------------------------------------------- */
5258 : // For x, 2 *, because we cache the precomputed values at the end.
5259 : double *padfX =
5260 239 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5261 : double *padfY =
5262 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5263 : double *padfZ =
5264 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5265 239 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5266 :
5267 239 : const bool bUse4SamplesFormula =
5268 239 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5269 :
5270 239 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5271 239 : if (poWK->eResample != GRA_NearestNeighbour)
5272 : {
5273 220 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5274 : }
5275 239 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5276 239 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5277 239 : const double dfErrorThreshold = CPLAtof(
5278 239 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5279 :
5280 : const bool bOneSourceCornerFailsToReproject =
5281 239 : GWKOneSourceCornerFailsToReproject(psJob);
5282 :
5283 : // Precompute values.
5284 6469 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5285 6230 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5286 :
5287 : /* ==================================================================== */
5288 : /* Loop over output lines. */
5289 : /* ==================================================================== */
5290 6469 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5291 : {
5292 : /* --------------------------------------------------------------------
5293 : */
5294 : /* Setup points to transform to source image space. */
5295 : /* --------------------------------------------------------------------
5296 : */
5297 6230 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5298 6230 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5299 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5300 236160 : padfY[iDstX] = dfY;
5301 6230 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5302 :
5303 : /* --------------------------------------------------------------------
5304 : */
5305 : /* Transform the points from destination pixel/line coordinates */
5306 : /* to source pixel/line coordinates. */
5307 : /* --------------------------------------------------------------------
5308 : */
5309 6230 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5310 : padfY, padfZ, pabSuccess);
5311 6230 : if (dfSrcCoordPrecision > 0.0)
5312 : {
5313 0 : GWKRoundSourceCoordinates(
5314 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5315 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5316 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5317 : }
5318 :
5319 : /* ====================================================================
5320 : */
5321 : /* Loop over pixels in output scanline. */
5322 : /* ====================================================================
5323 : */
5324 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5325 : {
5326 236160 : GPtrDiff_t iSrcOffset = 0;
5327 236160 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5328 : padfX, padfY, nSrcXSize,
5329 : nSrcYSize, iSrcOffset))
5330 0 : continue;
5331 :
5332 : /* --------------------------------------------------------------------
5333 : */
5334 : /* Do not try to apply transparent/invalid source pixels to the
5335 : */
5336 : /* destination. This currently ignores the multi-pixel input
5337 : */
5338 : /* of bilinear and cubic resamples. */
5339 : /* --------------------------------------------------------------------
5340 : */
5341 236160 : double dfDensity = 1.0;
5342 :
5343 236160 : if (poWK->pafUnifiedSrcDensity != nullptr)
5344 : {
5345 1200 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5346 1200 : if (dfDensity < SRC_DENSITY_THRESHOLD)
5347 : {
5348 0 : if (!bOneSourceCornerFailsToReproject)
5349 : {
5350 0 : continue;
5351 : }
5352 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5353 : psJob, iSrcOffset))
5354 : {
5355 0 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5356 : }
5357 : else
5358 : {
5359 0 : continue;
5360 : }
5361 : }
5362 : }
5363 :
5364 236160 : if (poWK->panUnifiedSrcValid != nullptr &&
5365 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5366 : {
5367 0 : if (!bOneSourceCornerFailsToReproject)
5368 : {
5369 0 : continue;
5370 : }
5371 0 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5372 : {
5373 0 : continue;
5374 : }
5375 : }
5376 :
5377 : /* ====================================================================
5378 : */
5379 : /* Loop processing each band. */
5380 : /* ====================================================================
5381 : */
5382 236160 : bool bHasFoundDensity = false;
5383 :
5384 236160 : const GPtrDiff_t iDstOffset =
5385 236160 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5386 472320 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5387 : {
5388 236160 : double dfBandDensity = 0.0;
5389 236160 : double dfValueReal = 0.0;
5390 236160 : double dfValueImag = 0.0;
5391 :
5392 : /* --------------------------------------------------------------------
5393 : */
5394 : /* Collect the source value. */
5395 : /* --------------------------------------------------------------------
5396 : */
5397 236160 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5398 : nSrcYSize == 1)
5399 : {
5400 : // FALSE is returned if dfBandDensity == 0, which is
5401 : // checked below.
5402 568 : CPL_IGNORE_RET_VAL(GWKGetPixelValue(
5403 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
5404 : &dfValueImag));
5405 : }
5406 235592 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5407 : {
5408 248 : GWKBilinearResample4Sample(
5409 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5410 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5411 : &dfValueReal, &dfValueImag);
5412 : }
5413 235344 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5414 : {
5415 248 : GWKCubicResample4Sample(
5416 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5417 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5418 : &dfValueReal, &dfValueImag);
5419 : }
5420 : else
5421 : #ifdef DEBUG
5422 : // Only useful for clang static analyzer.
5423 235096 : if (psWrkStruct != nullptr)
5424 : #endif
5425 : {
5426 235096 : psWrkStruct->pfnGWKResample(
5427 235096 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5428 235096 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5429 : &dfValueReal, &dfValueImag, psWrkStruct);
5430 : }
5431 :
5432 : // If we didn't find any valid inputs skip to next band.
5433 236160 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5434 0 : continue;
5435 :
5436 236160 : if (poWK->bApplyVerticalShift)
5437 : {
5438 0 : if (!std::isfinite(padfZ[iDstX]))
5439 0 : continue;
5440 : // Subtract padfZ[] since the coordinate transformation is
5441 : // from target to source
5442 0 : dfValueReal =
5443 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
5444 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5445 : }
5446 :
5447 236160 : bHasFoundDensity = true;
5448 :
5449 : /* --------------------------------------------------------------------
5450 : */
5451 : /* We have a computed value from the source. Now apply it
5452 : * to */
5453 : /* the destination pixel. */
5454 : /* --------------------------------------------------------------------
5455 : */
5456 236160 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
5457 : dfValueReal, dfValueImag);
5458 : }
5459 :
5460 236160 : if (!bHasFoundDensity)
5461 0 : continue;
5462 :
5463 : /* --------------------------------------------------------------------
5464 : */
5465 : /* Update destination density/validity masks. */
5466 : /* --------------------------------------------------------------------
5467 : */
5468 236160 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5469 :
5470 236160 : if (poWK->panDstValid != nullptr)
5471 : {
5472 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
5473 : }
5474 : } /* Next iDstX */
5475 :
5476 : /* --------------------------------------------------------------------
5477 : */
5478 : /* Report progress to the user, and optionally cancel out. */
5479 : /* --------------------------------------------------------------------
5480 : */
5481 6230 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5482 0 : break;
5483 : }
5484 :
5485 : /* -------------------------------------------------------------------- */
5486 : /* Cleanup and return. */
5487 : /* -------------------------------------------------------------------- */
5488 239 : CPLFree(padfX);
5489 239 : CPLFree(padfY);
5490 239 : CPLFree(padfZ);
5491 239 : CPLFree(pabSuccess);
5492 239 : if (psWrkStruct)
5493 220 : GWKResampleDeleteWrkStruct(psWrkStruct);
5494 239 : }
5495 :
5496 239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
5497 : {
5498 239 : return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
5499 : }
5500 :
5501 : /************************************************************************/
5502 : /* GWKRealCase() */
5503 : /* */
5504 : /* General case for non-complex data types. */
5505 : /************************************************************************/
5506 :
5507 151 : static void GWKRealCaseThread(void *pData)
5508 :
5509 : {
5510 151 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5511 151 : GDALWarpKernel *poWK = psJob->poWK;
5512 151 : const int iYMin = psJob->iYMin;
5513 151 : const int iYMax = psJob->iYMax;
5514 :
5515 151 : const int nDstXSize = poWK->nDstXSize;
5516 151 : const int nSrcXSize = poWK->nSrcXSize;
5517 151 : const int nSrcYSize = poWK->nSrcYSize;
5518 : const double dfMultFactorVerticalShiftPipeline =
5519 151 : poWK->bApplyVerticalShift
5520 151 : ? CPLAtof(CSLFetchNameValueDef(
5521 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5522 : "1.0"))
5523 151 : : 0.0;
5524 :
5525 : /* -------------------------------------------------------------------- */
5526 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5527 : /* scanlines worth of positions. */
5528 : /* -------------------------------------------------------------------- */
5529 :
5530 : // For x, 2 *, because we cache the precomputed values at the end.
5531 : double *padfX =
5532 151 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5533 : double *padfY =
5534 151 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5535 : double *padfZ =
5536 151 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5537 151 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5538 :
5539 151 : const bool bUse4SamplesFormula =
5540 151 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5541 :
5542 151 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5543 151 : if (poWK->eResample != GRA_NearestNeighbour)
5544 : {
5545 124 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5546 : }
5547 151 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5548 151 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5549 151 : const double dfErrorThreshold = CPLAtof(
5550 151 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5551 :
5552 438 : const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
5553 287 : poWK->papanBandSrcValid == nullptr &&
5554 136 : poWK->pafUnifiedSrcDensity != nullptr;
5555 :
5556 : const bool bOneSourceCornerFailsToReproject =
5557 151 : GWKOneSourceCornerFailsToReproject(psJob);
5558 :
5559 : // Precompute values.
5560 19528 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5561 19377 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5562 :
5563 : /* ==================================================================== */
5564 : /* Loop over output lines. */
5565 : /* ==================================================================== */
5566 22231 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5567 : {
5568 : /* --------------------------------------------------------------------
5569 : */
5570 : /* Setup points to transform to source image space. */
5571 : /* --------------------------------------------------------------------
5572 : */
5573 22080 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5574 22080 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5575 43558400 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5576 43536300 : padfY[iDstX] = dfY;
5577 22080 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5578 :
5579 : /* --------------------------------------------------------------------
5580 : */
5581 : /* Transform the points from destination pixel/line coordinates */
5582 : /* to source pixel/line coordinates. */
5583 : /* --------------------------------------------------------------------
5584 : */
5585 22080 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5586 : padfY, padfZ, pabSuccess);
5587 22080 : if (dfSrcCoordPrecision > 0.0)
5588 : {
5589 0 : GWKRoundSourceCoordinates(
5590 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5591 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5592 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5593 : }
5594 :
5595 : /* ====================================================================
5596 : */
5597 : /* Loop over pixels in output scanline. */
5598 : /* ====================================================================
5599 : */
5600 43558400 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5601 : {
5602 43536300 : GPtrDiff_t iSrcOffset = 0;
5603 43536300 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5604 : padfX, padfY, nSrcXSize,
5605 : nSrcYSize, iSrcOffset))
5606 42894000 : continue;
5607 :
5608 : /* --------------------------------------------------------------------
5609 : */
5610 : /* Do not try to apply transparent/invalid source pixels to the
5611 : */
5612 : /* destination. This currently ignores the multi-pixel input
5613 : */
5614 : /* of bilinear and cubic resamples. */
5615 : /* --------------------------------------------------------------------
5616 : */
5617 31479800 : double dfDensity = 1.0;
5618 :
5619 31479800 : if (poWK->pafUnifiedSrcDensity != nullptr)
5620 : {
5621 1360180 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5622 1360180 : if (dfDensity < SRC_DENSITY_THRESHOLD)
5623 : {
5624 1308710 : if (!bOneSourceCornerFailsToReproject)
5625 : {
5626 1308710 : continue;
5627 : }
5628 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5629 : psJob, iSrcOffset))
5630 : {
5631 0 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5632 : }
5633 : else
5634 : {
5635 0 : continue;
5636 : }
5637 : }
5638 : }
5639 :
5640 59799700 : if (poWK->panUnifiedSrcValid != nullptr &&
5641 29628600 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5642 : {
5643 29531000 : if (!bOneSourceCornerFailsToReproject)
5644 : {
5645 29528700 : continue;
5646 : }
5647 2229 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5648 : {
5649 0 : continue;
5650 : }
5651 : }
5652 :
5653 : /* ====================================================================
5654 : */
5655 : /* Loop processing each band. */
5656 : /* ====================================================================
5657 : */
5658 642336 : bool bHasFoundDensity = false;
5659 :
5660 642336 : const GPtrDiff_t iDstOffset =
5661 642336 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5662 1716290 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5663 : {
5664 1073960 : double dfBandDensity = 0.0;
5665 1073960 : double dfValueReal = 0.0;
5666 :
5667 : /* --------------------------------------------------------------------
5668 : */
5669 : /* Collect the source value. */
5670 : /* --------------------------------------------------------------------
5671 : */
5672 1073960 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5673 : nSrcYSize == 1)
5674 : {
5675 : // FALSE is returned if dfBandDensity == 0, which is
5676 : // checked below.
5677 151448 : CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
5678 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
5679 : }
5680 922509 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5681 : {
5682 1086 : double dfValueImagIgnored = 0.0;
5683 1086 : GWKBilinearResample4Sample(
5684 1086 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5685 1086 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5686 1086 : &dfValueReal, &dfValueImagIgnored);
5687 : }
5688 921423 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5689 : {
5690 299992 : if (bSrcMaskIsDensity)
5691 : {
5692 361 : if (poWK->eWorkingDataType == GDT_Byte)
5693 : {
5694 361 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
5695 361 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5696 361 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5697 : &dfValueReal);
5698 : }
5699 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
5700 : {
5701 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<
5702 0 : GUInt16>(poWK, iBand,
5703 0 : padfX[iDstX] - poWK->nSrcXOff,
5704 0 : padfY[iDstX] - poWK->nSrcYOff,
5705 : &dfBandDensity, &dfValueReal);
5706 : }
5707 : else
5708 : {
5709 0 : GWKCubicResampleSrcMaskIsDensity4SampleReal(
5710 0 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5711 0 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5712 : &dfValueReal);
5713 : }
5714 : }
5715 : else
5716 : {
5717 299631 : double dfValueImagIgnored = 0.0;
5718 299631 : GWKCubicResample4Sample(
5719 299631 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5720 299631 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5721 : &dfValueReal, &dfValueImagIgnored);
5722 299992 : }
5723 : }
5724 : else
5725 : #ifdef DEBUG
5726 : // Only useful for clang static analyzer.
5727 621431 : if (psWrkStruct != nullptr)
5728 : #endif
5729 : {
5730 621431 : double dfValueImagIgnored = 0.0;
5731 621431 : psWrkStruct->pfnGWKResample(
5732 621431 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5733 621431 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5734 : &dfValueReal, &dfValueImagIgnored, psWrkStruct);
5735 : }
5736 :
5737 : // If we didn't find any valid inputs skip to next band.
5738 1073960 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5739 0 : continue;
5740 :
5741 1073960 : if (poWK->bApplyVerticalShift)
5742 : {
5743 0 : if (!std::isfinite(padfZ[iDstX]))
5744 0 : continue;
5745 : // Subtract padfZ[] since the coordinate transformation is
5746 : // from target to source
5747 0 : dfValueReal =
5748 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
5749 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5750 : }
5751 :
5752 1073960 : bHasFoundDensity = true;
5753 :
5754 : /* --------------------------------------------------------------------
5755 : */
5756 : /* We have a computed value from the source. Now apply it
5757 : * to */
5758 : /* the destination pixel. */
5759 : /* --------------------------------------------------------------------
5760 : */
5761 1073960 : GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
5762 : dfValueReal);
5763 : }
5764 :
5765 642336 : if (!bHasFoundDensity)
5766 0 : continue;
5767 :
5768 : /* --------------------------------------------------------------------
5769 : */
5770 : /* Update destination density/validity masks. */
5771 : /* --------------------------------------------------------------------
5772 : */
5773 642336 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5774 :
5775 642336 : if (poWK->panDstValid != nullptr)
5776 : {
5777 101716 : CPLMaskSet(poWK->panDstValid, iDstOffset);
5778 : }
5779 : } // Next iDstX.
5780 :
5781 : /* --------------------------------------------------------------------
5782 : */
5783 : /* Report progress to the user, and optionally cancel out. */
5784 : /* --------------------------------------------------------------------
5785 : */
5786 22080 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5787 0 : break;
5788 : }
5789 :
5790 : /* -------------------------------------------------------------------- */
5791 : /* Cleanup and return. */
5792 : /* -------------------------------------------------------------------- */
5793 151 : CPLFree(padfX);
5794 151 : CPLFree(padfY);
5795 151 : CPLFree(padfZ);
5796 151 : CPLFree(pabSuccess);
5797 151 : if (psWrkStruct)
5798 124 : GWKResampleDeleteWrkStruct(psWrkStruct);
5799 151 : }
5800 :
5801 151 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
5802 : {
5803 151 : return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
5804 : }
5805 :
5806 : /************************************************************************/
5807 : /* GWKCubicResampleNoMasks4MultiBandT() */
5808 : /************************************************************************/
5809 :
5810 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
5811 : /* and enough SSE registries */
5812 : #if defined(USE_SSE2)
5813 :
5814 238596 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
5815 : const __m128 row2, const __m128 row3,
5816 : const __m128 weightsXY0,
5817 : const __m128 weightsXY1,
5818 : const __m128 weightsXY2,
5819 : const __m128 weightsXY3)
5820 : {
5821 1670170 : return XMMHorizontalAdd(_mm_add_ps(
5822 : _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
5823 : _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
5824 238596 : _mm_mul_ps(row3, weightsXY3))));
5825 : }
5826 :
5827 : template <class T>
5828 81323 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
5829 : double dfSrcX, double dfSrcY,
5830 : const GPtrDiff_t iDstOffset)
5831 : {
5832 81323 : const double dfSrcXShifted = dfSrcX - 0.5;
5833 81323 : const int iSrcX = static_cast<int>(dfSrcXShifted);
5834 81323 : const double dfSrcYShifted = dfSrcY - 0.5;
5835 81323 : const int iSrcY = static_cast<int>(dfSrcYShifted);
5836 81323 : const GPtrDiff_t iSrcOffset =
5837 81323 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
5838 :
5839 : // Get the bilinear interpolation at the image borders.
5840 81323 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
5841 80326 : iSrcY + 2 >= poWK->nSrcYSize)
5842 : {
5843 7164 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5844 : {
5845 : T value;
5846 5373 : GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
5847 : &value);
5848 5373 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
5849 : value;
5850 1791 : }
5851 : }
5852 : else
5853 : {
5854 79532 : const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
5855 79532 : const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
5856 :
5857 : float afCoeffsX[4];
5858 : float afCoeffsY[4];
5859 79532 : GWKCubicComputeWeights(fDeltaX, afCoeffsX);
5860 79532 : GWKCubicComputeWeights(fDeltaY, afCoeffsY);
5861 79532 : const auto weightsX = _mm_loadu_ps(afCoeffsX);
5862 : const auto weightsXY0 =
5863 159064 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
5864 : const auto weightsXY1 =
5865 159064 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
5866 : const auto weightsXY2 =
5867 159064 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
5868 : const auto weightsXY3 =
5869 79532 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
5870 :
5871 79532 : const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
5872 :
5873 79532 : int iBand = 0;
5874 : // Process 2 bands at a time
5875 159064 : for (; iBand + 1 < poWK->nBands; iBand += 2)
5876 : {
5877 79532 : const T *CPL_RESTRICT pBand0 =
5878 79532 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
5879 79532 : const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
5880 : const auto row1_0 =
5881 79532 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
5882 : const auto row2_0 =
5883 79532 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
5884 : const auto row3_0 =
5885 79532 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
5886 :
5887 79532 : const T *CPL_RESTRICT pBand1 =
5888 79532 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
5889 79532 : const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
5890 : const auto row1_1 =
5891 79532 : XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
5892 : const auto row2_1 =
5893 79532 : XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
5894 : const auto row3_1 =
5895 79532 : XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
5896 :
5897 : const float fValue_0 =
5898 79532 : Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
5899 : weightsXY1, weightsXY2, weightsXY3);
5900 :
5901 : const float fValue_1 =
5902 79532 : Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
5903 : weightsXY1, weightsXY2, weightsXY3);
5904 :
5905 79532 : T *CPL_RESTRICT pDstBand0 =
5906 79532 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
5907 79532 : pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
5908 :
5909 79532 : T *CPL_RESTRICT pDstBand1 =
5910 79532 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
5911 79532 : pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
5912 : }
5913 79532 : if (iBand < poWK->nBands)
5914 : {
5915 79532 : const T *CPL_RESTRICT pBand0 =
5916 79532 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
5917 79532 : const auto row0 = XMMLoad4Values(pBand0 + iOffset);
5918 : const auto row1 =
5919 79532 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
5920 : const auto row2 =
5921 79532 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
5922 : const auto row3 =
5923 79532 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
5924 :
5925 : const float fValue =
5926 79532 : Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
5927 : weightsXY2, weightsXY3);
5928 :
5929 79532 : T *CPL_RESTRICT pDstBand =
5930 79532 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
5931 79532 : pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
5932 : }
5933 : }
5934 :
5935 81323 : if (poWK->pafDstDensity)
5936 441 : poWK->pafDstDensity[iDstOffset] = 1.0f;
5937 81323 : }
5938 :
5939 : #endif // defined(USE_SSE2)
5940 :
5941 : /************************************************************************/
5942 : /* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */
5943 : /************************************************************************/
5944 :
5945 : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
5946 1189 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
5947 :
5948 : {
5949 1189 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5950 1189 : GDALWarpKernel *poWK = psJob->poWK;
5951 1189 : const int iYMin = psJob->iYMin;
5952 1189 : const int iYMax = psJob->iYMax;
5953 1171 : const double dfMultFactorVerticalShiftPipeline =
5954 1189 : poWK->bApplyVerticalShift
5955 18 : ? CPLAtof(CSLFetchNameValueDef(
5956 18 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5957 : "1.0"))
5958 : : 0.0;
5959 :
5960 1189 : const int nDstXSize = poWK->nDstXSize;
5961 1189 : const int nSrcXSize = poWK->nSrcXSize;
5962 1189 : const int nSrcYSize = poWK->nSrcYSize;
5963 :
5964 : /* -------------------------------------------------------------------- */
5965 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5966 : /* scanlines worth of positions. */
5967 : /* -------------------------------------------------------------------- */
5968 :
5969 : // For x, 2 *, because we cache the precomputed values at the end.
5970 : double *padfX =
5971 1189 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5972 : double *padfY =
5973 1189 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5974 : double *padfZ =
5975 1189 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5976 1189 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5977 :
5978 1189 : const int nXRadius = poWK->nXRadius;
5979 : double *padfWeightsX =
5980 1189 : static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
5981 : double *padfWeightsY = static_cast<double *>(
5982 1189 : CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
5983 1189 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5984 1189 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5985 1189 : const double dfErrorThreshold = CPLAtof(
5986 1189 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5987 :
5988 : // Precompute values.
5989 256069 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5990 254880 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5991 :
5992 : /* ==================================================================== */
5993 : /* Loop over output lines. */
5994 : /* ==================================================================== */
5995 130279 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5996 : {
5997 : /* --------------------------------------------------------------------
5998 : */
5999 : /* Setup points to transform to source image space. */
6000 : /* --------------------------------------------------------------------
6001 : */
6002 129092 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6003 129092 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6004 56026240 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6005 55897120 : padfY[iDstX] = dfY;
6006 129092 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6007 :
6008 : /* --------------------------------------------------------------------
6009 : */
6010 : /* Transform the points from destination pixel/line coordinates */
6011 : /* to source pixel/line coordinates. */
6012 : /* --------------------------------------------------------------------
6013 : */
6014 129092 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6015 : padfY, padfZ, pabSuccess);
6016 129092 : if (dfSrcCoordPrecision > 0.0)
6017 : {
6018 1000 : GWKRoundSourceCoordinates(
6019 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6020 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6021 1000 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6022 : }
6023 :
6024 : /* ====================================================================
6025 : */
6026 : /* Loop over pixels in output scanline. */
6027 : /* ====================================================================
6028 : */
6029 56028450 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6030 : {
6031 55899330 : GPtrDiff_t iSrcOffset = 0;
6032 55899330 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6033 : padfX, padfY, nSrcXSize,
6034 : nSrcYSize, iSrcOffset))
6035 6608460 : continue;
6036 :
6037 : /* ====================================================================
6038 : */
6039 : /* Loop processing each band. */
6040 : /* ====================================================================
6041 : */
6042 49339138 : const GPtrDiff_t iDstOffset =
6043 49339138 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6044 :
6045 : #if defined(USE_SSE2)
6046 : if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
6047 : (std::is_same<T, GByte>::value ||
6048 : std::is_same<T, GUInt16>::value))
6049 : {
6050 752574 : if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
6051 : {
6052 81323 : GWKCubicResampleNoMasks4MultiBandT<T>(
6053 81323 : poWK, padfX[iDstX] - poWK->nSrcXOff,
6054 81323 : padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
6055 :
6056 81323 : continue;
6057 : }
6058 : }
6059 : #endif // defined(USE_SSE2)
6060 :
6061 49257815 : [[maybe_unused]] double dfInvWeights = 0;
6062 139782922 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6063 : {
6064 90492185 : T value = 0;
6065 : if constexpr (eResample == GRA_NearestNeighbour)
6066 : {
6067 76037130 : value = reinterpret_cast<T *>(
6068 76037130 : poWK->papabySrcImage[iBand])[iSrcOffset];
6069 : }
6070 : else if constexpr (bUse4SamplesFormula)
6071 : {
6072 : if constexpr (eResample == GRA_Bilinear)
6073 3363189 : GWKBilinearResampleNoMasks4SampleT(
6074 3363189 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6075 3363189 : padfY[iDstX] - poWK->nSrcYOff, &value);
6076 : else
6077 1906603 : GWKCubicResampleNoMasks4SampleT(
6078 1906603 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6079 1906603 : padfY[iDstX] - poWK->nSrcYOff, &value);
6080 : }
6081 : else
6082 : {
6083 9185263 : GWKResampleNoMasksT(
6084 9185263 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6085 9185263 : padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
6086 : padfWeightsY, dfInvWeights);
6087 : }
6088 :
6089 90486835 : if (poWK->bApplyVerticalShift)
6090 : {
6091 818 : if (!std::isfinite(padfZ[iDstX]))
6092 0 : continue;
6093 : // Subtract padfZ[] since the coordinate transformation is
6094 : // from target to source
6095 39547 : value = GWKClampValueT<T>(
6096 818 : value * poWK->dfMultFactorVerticalShift -
6097 818 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6098 : }
6099 :
6100 90525305 : if (poWK->pafDstDensity)
6101 11712309 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6102 :
6103 90525305 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6104 : value;
6105 : }
6106 : }
6107 :
6108 : /* --------------------------------------------------------------------
6109 : */
6110 : /* Report progress to the user, and optionally cancel out. */
6111 : /* --------------------------------------------------------------------
6112 : */
6113 129092 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6114 2 : break;
6115 : }
6116 :
6117 : /* -------------------------------------------------------------------- */
6118 : /* Cleanup and return. */
6119 : /* -------------------------------------------------------------------- */
6120 1189 : CPLFree(padfX);
6121 1189 : CPLFree(padfY);
6122 1189 : CPLFree(padfZ);
6123 1189 : CPLFree(pabSuccess);
6124 1189 : CPLFree(padfWeightsX);
6125 1189 : CPLFree(padfWeightsY);
6126 1189 : }
6127 :
6128 : template <class T, GDALResampleAlg eResample>
6129 947 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
6130 : {
6131 947 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6132 : pData);
6133 947 : }
6134 :
6135 : template <class T, GDALResampleAlg eResample>
6136 242 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
6137 :
6138 : {
6139 242 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6140 242 : GDALWarpKernel *poWK = psJob->poWK;
6141 : static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
6142 242 : const bool bUse4SamplesFormula =
6143 242 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
6144 242 : if (bUse4SamplesFormula)
6145 142 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
6146 : pData);
6147 : else
6148 100 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6149 : pData);
6150 242 : }
6151 :
6152 896 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6153 : {
6154 896 : return GWKRun(
6155 : poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
6156 896 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
6157 : }
6158 :
6159 126 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6160 : {
6161 126 : return GWKRun(
6162 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
6163 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
6164 126 : GRA_Bilinear>);
6165 : }
6166 :
6167 72 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6168 : {
6169 72 : return GWKRun(
6170 : poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
6171 72 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
6172 : }
6173 :
6174 9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6175 : {
6176 9 : return GWKRun(
6177 : poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
6178 9 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
6179 : }
6180 :
6181 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6182 :
6183 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6184 : {
6185 : return GWKRun(
6186 : poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
6187 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
6188 : }
6189 : #endif
6190 :
6191 12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6192 : {
6193 12 : return GWKRun(
6194 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
6195 12 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
6196 : }
6197 :
6198 : /************************************************************************/
6199 : /* GWKNearestByte() */
6200 : /* */
6201 : /* Case for 8bit input data with nearest neighbour resampling */
6202 : /* using valid flags. Should be as fast as possible for this */
6203 : /* particular transformation type. */
6204 : /************************************************************************/
6205 :
6206 388 : template <class T> static void GWKNearestThread(void *pData)
6207 :
6208 : {
6209 388 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6210 388 : GDALWarpKernel *poWK = psJob->poWK;
6211 388 : const int iYMin = psJob->iYMin;
6212 388 : const int iYMax = psJob->iYMax;
6213 388 : const double dfMultFactorVerticalShiftPipeline =
6214 388 : poWK->bApplyVerticalShift
6215 0 : ? CPLAtof(CSLFetchNameValueDef(
6216 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6217 : "1.0"))
6218 : : 0.0;
6219 :
6220 388 : const int nDstXSize = poWK->nDstXSize;
6221 388 : const int nSrcXSize = poWK->nSrcXSize;
6222 388 : const int nSrcYSize = poWK->nSrcYSize;
6223 :
6224 : /* -------------------------------------------------------------------- */
6225 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6226 : /* scanlines worth of positions. */
6227 : /* -------------------------------------------------------------------- */
6228 :
6229 : // For x, 2 *, because we cache the precomputed values at the end.
6230 : double *padfX =
6231 388 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6232 : double *padfY =
6233 388 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6234 : double *padfZ =
6235 388 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6236 388 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6237 :
6238 388 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6239 388 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6240 388 : const double dfErrorThreshold = CPLAtof(
6241 388 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6242 :
6243 : const bool bOneSourceCornerFailsToReproject =
6244 388 : GWKOneSourceCornerFailsToReproject(psJob);
6245 :
6246 : // Precompute values.
6247 56961 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6248 56573 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6249 :
6250 : /* ==================================================================== */
6251 : /* Loop over output lines. */
6252 : /* ==================================================================== */
6253 41998 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6254 : {
6255 :
6256 : /* --------------------------------------------------------------------
6257 : */
6258 : /* Setup points to transform to source image space. */
6259 : /* --------------------------------------------------------------------
6260 : */
6261 41610 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6262 41610 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6263 8768225 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6264 8726610 : padfY[iDstX] = dfY;
6265 41610 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6266 :
6267 : /* --------------------------------------------------------------------
6268 : */
6269 : /* Transform the points from destination pixel/line coordinates */
6270 : /* to source pixel/line coordinates. */
6271 : /* --------------------------------------------------------------------
6272 : */
6273 41610 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6274 : padfY, padfZ, pabSuccess);
6275 41610 : if (dfSrcCoordPrecision > 0.0)
6276 : {
6277 0 : GWKRoundSourceCoordinates(
6278 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6279 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6280 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6281 : }
6282 : /* ====================================================================
6283 : */
6284 : /* Loop over pixels in output scanline. */
6285 : /* ====================================================================
6286 : */
6287 8768225 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6288 : {
6289 8726610 : GPtrDiff_t iSrcOffset = 0;
6290 8726610 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6291 : padfX, padfY, nSrcXSize,
6292 : nSrcYSize, iSrcOffset))
6293 2242803 : continue;
6294 :
6295 : /* --------------------------------------------------------------------
6296 : */
6297 : /* Do not try to apply invalid source pixels to the dest. */
6298 : /* --------------------------------------------------------------------
6299 : */
6300 8418056 : if (poWK->panUnifiedSrcValid != nullptr &&
6301 930304 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6302 : {
6303 48956 : if (!bOneSourceCornerFailsToReproject)
6304 : {
6305 41471 : continue;
6306 : }
6307 7485 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
6308 : {
6309 5224 : continue;
6310 : }
6311 : }
6312 :
6313 : /* --------------------------------------------------------------------
6314 : */
6315 : /* Do not try to apply transparent source pixels to the
6316 : * destination.*/
6317 : /* --------------------------------------------------------------------
6318 : */
6319 7441059 : double dfDensity = 1.0;
6320 :
6321 7441059 : if (poWK->pafUnifiedSrcDensity != nullptr)
6322 : {
6323 1064945 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
6324 1064945 : if (dfDensity < SRC_DENSITY_THRESHOLD)
6325 957251 : continue;
6326 : }
6327 :
6328 : /* ====================================================================
6329 : */
6330 : /* Loop processing each band. */
6331 : /* ====================================================================
6332 : */
6333 :
6334 6483807 : const GPtrDiff_t iDstOffset =
6335 6483807 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6336 :
6337 14642964 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6338 : {
6339 8159117 : T value = 0;
6340 8159117 : double dfBandDensity = 0.0;
6341 :
6342 : /* --------------------------------------------------------------------
6343 : */
6344 : /* Collect the source value. */
6345 : /* --------------------------------------------------------------------
6346 : */
6347 8159117 : if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
6348 : &value))
6349 : {
6350 :
6351 8159107 : if (poWK->bApplyVerticalShift)
6352 : {
6353 0 : if (!std::isfinite(padfZ[iDstX]))
6354 0 : continue;
6355 : // Subtract padfZ[] since the coordinate transformation
6356 : // is from target to source
6357 0 : value = GWKClampValueT<T>(
6358 0 : value * poWK->dfMultFactorVerticalShift -
6359 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6360 : }
6361 :
6362 8159107 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6363 : dfBandDensity, value);
6364 : }
6365 : }
6366 :
6367 : /* --------------------------------------------------------------------
6368 : */
6369 : /* Mark this pixel valid/opaque in the output. */
6370 : /* --------------------------------------------------------------------
6371 : */
6372 6483807 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6373 :
6374 6483807 : if (poWK->panDstValid != nullptr)
6375 : {
6376 5854774 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6377 : }
6378 : } /* Next iDstX */
6379 :
6380 : /* --------------------------------------------------------------------
6381 : */
6382 : /* Report progress to the user, and optionally cancel out. */
6383 : /* --------------------------------------------------------------------
6384 : */
6385 41610 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6386 0 : break;
6387 : }
6388 :
6389 : /* -------------------------------------------------------------------- */
6390 : /* Cleanup and return. */
6391 : /* -------------------------------------------------------------------- */
6392 388 : CPLFree(padfX);
6393 388 : CPLFree(padfY);
6394 388 : CPLFree(padfZ);
6395 388 : CPLFree(pabSuccess);
6396 388 : }
6397 :
6398 324 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
6399 : {
6400 324 : return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
6401 : }
6402 :
6403 14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6404 : {
6405 14 : return GWKRun(
6406 : poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
6407 14 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
6408 : }
6409 :
6410 5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6411 : {
6412 5 : return GWKRun(
6413 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
6414 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
6415 5 : GRA_Bilinear>);
6416 : }
6417 :
6418 6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6419 : {
6420 6 : return GWKRun(
6421 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
6422 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
6423 6 : GRA_Bilinear>);
6424 : }
6425 :
6426 4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6427 : {
6428 4 : return GWKRun(
6429 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
6430 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
6431 4 : GRA_Bilinear>);
6432 : }
6433 :
6434 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6435 :
6436 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6437 : {
6438 : return GWKRun(
6439 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
6440 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
6441 : GRA_Bilinear>);
6442 : }
6443 : #endif
6444 :
6445 5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6446 : {
6447 5 : return GWKRun(
6448 : poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
6449 5 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
6450 : }
6451 :
6452 12 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6453 : {
6454 12 : return GWKRun(
6455 : poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
6456 12 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
6457 : }
6458 :
6459 6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6460 : {
6461 6 : return GWKRun(
6462 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
6463 6 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
6464 : }
6465 :
6466 5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6467 : {
6468 5 : return GWKRun(
6469 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
6470 5 : GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
6471 : }
6472 :
6473 23 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
6474 : {
6475 23 : return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
6476 : }
6477 :
6478 0 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
6479 : {
6480 0 : return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
6481 : }
6482 :
6483 11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6484 : {
6485 11 : return GWKRun(
6486 : poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
6487 11 : GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
6488 : }
6489 :
6490 37 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
6491 : {
6492 37 : return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
6493 : }
6494 :
6495 : /************************************************************************/
6496 : /* GWKAverageOrMode() */
6497 : /* */
6498 : /************************************************************************/
6499 :
6500 : static void GWKAverageOrModeThread(void *pData);
6501 :
6502 130 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
6503 : {
6504 130 : return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
6505 : }
6506 :
6507 : // Overall logic based on GWKGeneralCaseThread().
6508 130 : static void GWKAverageOrModeThread(void *pData)
6509 : {
6510 130 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6511 130 : GDALWarpKernel *poWK = psJob->poWK;
6512 130 : const int iYMin = psJob->iYMin;
6513 130 : const int iYMax = psJob->iYMax;
6514 : const double dfMultFactorVerticalShiftPipeline =
6515 130 : poWK->bApplyVerticalShift
6516 130 : ? CPLAtof(CSLFetchNameValueDef(
6517 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6518 : "1.0"))
6519 130 : : 0.0;
6520 :
6521 130 : const int nDstXSize = poWK->nDstXSize;
6522 130 : const int nSrcXSize = poWK->nSrcXSize;
6523 130 : const int nSrcYSize = poWK->nSrcYSize;
6524 :
6525 : /* -------------------------------------------------------------------- */
6526 : /* Find out which algorithm to use (small optim.) */
6527 : /* -------------------------------------------------------------------- */
6528 130 : int nAlgo = 0;
6529 :
6530 : // Only used for GRA_Mode
6531 130 : float *pafRealVals = nullptr;
6532 130 : float *pafCounts = nullptr;
6533 130 : int nBins = 0;
6534 130 : int nBinsOffset = 0;
6535 130 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
6536 :
6537 : // Only used with nAlgo = 6.
6538 130 : float quant = 0.5;
6539 :
6540 : // To control array allocation only when data type is complex
6541 130 : const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
6542 :
6543 130 : if (poWK->eResample == GRA_Average)
6544 : {
6545 71 : nAlgo = GWKAOM_Average;
6546 : }
6547 59 : else if (poWK->eResample == GRA_RMS)
6548 : {
6549 9 : nAlgo = GWKAOM_RMS;
6550 : }
6551 50 : else if (poWK->eResample == GRA_Mode)
6552 : {
6553 : // TODO check color table count > 256.
6554 23 : if (poWK->eWorkingDataType == GDT_Byte ||
6555 17 : poWK->eWorkingDataType == GDT_UInt16 ||
6556 17 : poWK->eWorkingDataType == GDT_Int16)
6557 : {
6558 14 : nAlgo = GWKAOM_Imode;
6559 :
6560 : // In the case of a paletted or non-paletted byte band,
6561 : // Input values are between 0 and 255.
6562 14 : if (poWK->eWorkingDataType == GDT_Byte)
6563 : {
6564 6 : nBins = 256;
6565 : }
6566 : // In the case of Int8, input values are between -128 and 127.
6567 8 : else if (poWK->eWorkingDataType == GDT_Int8)
6568 : {
6569 0 : nBins = 256;
6570 0 : nBinsOffset = 128;
6571 : }
6572 : // In the case of Int16, input values are between -32768 and 32767.
6573 8 : else if (poWK->eWorkingDataType == GDT_Int16)
6574 : {
6575 8 : nBins = 65536;
6576 8 : nBinsOffset = 32768;
6577 : }
6578 : // In the case of UInt16, input values are between 0 and 65537.
6579 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
6580 : {
6581 0 : nBins = 65536;
6582 : }
6583 : pafCounts =
6584 14 : static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
6585 14 : if (pafCounts == nullptr)
6586 0 : return;
6587 : }
6588 : else
6589 : {
6590 9 : nAlgo = GWKAOM_Fmode;
6591 :
6592 9 : if (nSrcXSize > 0 && nSrcYSize > 0)
6593 : {
6594 : pafRealVals = static_cast<float *>(
6595 9 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
6596 : pafCounts = static_cast<float *>(
6597 9 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
6598 9 : if (pafRealVals == nullptr || pafCounts == nullptr)
6599 : {
6600 0 : VSIFree(pafRealVals);
6601 0 : VSIFree(pafCounts);
6602 0 : return;
6603 : }
6604 : }
6605 : }
6606 : }
6607 27 : else if (poWK->eResample == GRA_Max)
6608 : {
6609 6 : nAlgo = GWKAOM_Max;
6610 : }
6611 21 : else if (poWK->eResample == GRA_Min)
6612 : {
6613 5 : nAlgo = GWKAOM_Min;
6614 : }
6615 16 : else if (poWK->eResample == GRA_Med)
6616 : {
6617 6 : nAlgo = GWKAOM_Quant;
6618 6 : quant = 0.5;
6619 : }
6620 10 : else if (poWK->eResample == GRA_Q1)
6621 : {
6622 5 : nAlgo = GWKAOM_Quant;
6623 5 : quant = 0.25;
6624 : }
6625 5 : else if (poWK->eResample == GRA_Q3)
6626 : {
6627 5 : nAlgo = GWKAOM_Quant;
6628 5 : quant = 0.75;
6629 : }
6630 : #ifdef disabled
6631 : else if (poWK->eResample == GRA_Sum)
6632 : {
6633 : nAlgo = GWKAOM_Sum;
6634 : }
6635 : #endif
6636 : else
6637 : {
6638 : // Other resample algorithms not permitted here.
6639 0 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
6640 : "illegal resample");
6641 0 : return;
6642 : }
6643 :
6644 130 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() using algo %d",
6645 : nAlgo);
6646 :
6647 : /* -------------------------------------------------------------------- */
6648 : /* Allocate x,y,z coordinate arrays for transformation ... two */
6649 : /* scanlines worth of positions. */
6650 : /* -------------------------------------------------------------------- */
6651 :
6652 : double *padfX =
6653 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6654 : double *padfY =
6655 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6656 : double *padfZ =
6657 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6658 : double *padfX2 =
6659 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6660 : double *padfY2 =
6661 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6662 : double *padfZ2 =
6663 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6664 130 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6665 130 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6666 :
6667 130 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6668 130 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6669 130 : const double dfErrorThreshold = CPLAtof(
6670 130 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6671 :
6672 : const double dfExcludedValuesThreshold =
6673 130 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
6674 : "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
6675 130 : 100.0;
6676 : const double dfNodataValuesThreshold =
6677 130 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
6678 : "NODATA_VALUES_PCT_THRESHOLD", "100")) /
6679 130 : 100.0;
6680 :
6681 : const int nXMargin =
6682 130 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
6683 : const int nYMargin =
6684 130 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
6685 :
6686 : /* ==================================================================== */
6687 : /* Loop over output lines. */
6688 : /* ==================================================================== */
6689 6627 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6690 : {
6691 :
6692 : /* --------------------------------------------------------------------
6693 : */
6694 : /* Setup points to transform to source image space. */
6695 : /* --------------------------------------------------------------------
6696 : */
6697 1669840 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6698 : {
6699 1663340 : padfX[iDstX] = iDstX + poWK->nDstXOff;
6700 1663340 : padfY[iDstX] = iDstY + poWK->nDstYOff;
6701 1663340 : padfZ[iDstX] = 0.0;
6702 1663340 : padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
6703 1663340 : padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
6704 1663340 : padfZ2[iDstX] = 0.0;
6705 : }
6706 :
6707 : /* --------------------------------------------------------------------
6708 : */
6709 : /* Transform the points from destination pixel/line coordinates */
6710 : /* to source pixel/line coordinates. */
6711 : /* --------------------------------------------------------------------
6712 : */
6713 6497 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6714 : padfY, padfZ, pabSuccess);
6715 6497 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
6716 : padfY2, padfZ2, pabSuccess2);
6717 :
6718 6497 : if (dfSrcCoordPrecision > 0.0)
6719 : {
6720 0 : GWKRoundSourceCoordinates(
6721 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6722 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6723 0 : poWK->nDstXOff, iDstY + poWK->nDstYOff);
6724 0 : GWKRoundSourceCoordinates(
6725 : nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2,
6726 : dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
6727 0 : psJob->pTransformerArg, 1.0 + poWK->nDstXOff,
6728 0 : iDstY + 1.0 + poWK->nDstYOff);
6729 : }
6730 :
6731 : /* ====================================================================
6732 : */
6733 : /* Loop over pixels in output scanline. */
6734 : /* ====================================================================
6735 : */
6736 1669840 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6737 : {
6738 1663340 : GPtrDiff_t iSrcOffset = 0;
6739 1663340 : double dfDensity = 1.0;
6740 1663340 : bool bHasFoundDensity = false;
6741 :
6742 1663340 : if (!pabSuccess[iDstX] || !pabSuccess2[iDstX])
6743 311460 : continue;
6744 :
6745 : // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
6746 : // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
6747 1663340 : if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6748 1663320 : padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6749 1663320 : padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6750 1663300 : padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6751 1663300 : padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6752 1663300 : padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6753 1663290 : padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
6754 1663290 : padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
6755 : {
6756 62 : continue;
6757 : }
6758 :
6759 1663280 : const GPtrDiff_t iDstOffset =
6760 1663280 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6761 :
6762 : // Compute corners in source crs.
6763 :
6764 : // The transformation might not have preserved ordering of
6765 : // coordinates so do the necessary swapping (#5433).
6766 : // NOTE: this is really an approximative fix. To do something
6767 : // more precise we would for example need to compute the
6768 : // transformation of coordinates in the
6769 : // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
6770 : // coordinates, and take the bounding box of the got source
6771 : // coordinates.
6772 :
6773 1663280 : if (padfX[iDstX] > padfX2[iDstX])
6774 268744 : std::swap(padfX[iDstX], padfX2[iDstX]);
6775 :
6776 : // Detect situations where the target pixel is close to the
6777 : // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
6778 : // close to the left-most and right-most columns of the source
6779 : // raster. The 2 value below was experimentally determined to
6780 : // avoid false-positives and false-negatives.
6781 : // Addresses https://github.com/OSGeo/gdal/issues/6478
6782 1663280 : bool bWrapOverX = false;
6783 1663280 : const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
6784 1663280 : if (poWK->nSrcXOff == 0 &&
6785 1663280 : padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
6786 14495 : (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale <
6787 : nThresholdWrapOverX)
6788 : {
6789 : // Check there is a discontinuity by checking at mid-pixel.
6790 : // NOTE: all this remains fragile. To confidently
6791 : // detect antimeridian warping we should probably try to access
6792 : // georeferenced coordinates, and not rely only on tests on
6793 : // image space coordinates. But accessing georeferenced
6794 : // coordinates from here is not trivial, and we would for example
6795 : // have to handle both geographic, Mercator, etc.
6796 : // Let's hope this heuristics is good enough for now.
6797 1041 : double x = iDstX + 0.5 + poWK->nDstXOff;
6798 1041 : double y = iDstY + poWK->nDstYOff;
6799 1041 : double z = 0;
6800 1041 : int bSuccess = FALSE;
6801 1041 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y,
6802 : &z, &bSuccess);
6803 1041 : if (bSuccess && x < padfX[iDstX])
6804 : {
6805 1008 : bWrapOverX = true;
6806 1008 : std::swap(padfX[iDstX], padfX2[iDstX]);
6807 1008 : padfX2[iDstX] += nSrcXSize;
6808 : }
6809 : }
6810 :
6811 1663280 : const double dfXMin = padfX[iDstX] - poWK->nSrcXOff;
6812 1663280 : const double dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
6813 1663280 : constexpr double EPS = 1e-10;
6814 : // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
6815 1663280 : if (!(dfXMax > -EPS && dfXMin < nSrcXSize + EPS))
6816 72 : continue;
6817 1663200 : int iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPS), 0.0));
6818 1663200 : int iSrcXMax = static_cast<int>(
6819 1663200 : std::min(ceil(dfXMax - EPS), static_cast<double>(INT_MAX)));
6820 1663200 : if (!bWrapOverX)
6821 1662200 : iSrcXMax = std::min(iSrcXMax, nSrcXSize);
6822 1663200 : if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
6823 472 : iSrcXMax++;
6824 :
6825 1663200 : if (padfY[iDstX] > padfY2[iDstX])
6826 270117 : std::swap(padfY[iDstX], padfY2[iDstX]);
6827 1663200 : const double dfYMin = padfY[iDstX] - poWK->nSrcYOff;
6828 1663200 : const double dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
6829 : // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
6830 1663200 : if (!(dfYMax > -EPS && dfYMin < nSrcYSize + EPS))
6831 36 : continue;
6832 1663170 : int iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPS), 0.0));
6833 : int iSrcYMax =
6834 1663170 : std::min(static_cast<int>(ceil(dfYMax - EPS)), nSrcYSize);
6835 1663170 : if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
6836 0 : iSrcYMax++;
6837 :
6838 : #define COMPUTE_WEIGHT_Y(iSrcY) \
6839 : ((iSrcY == iSrcYMin) \
6840 : ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin)) \
6841 : : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax) \
6842 : : 1.0)
6843 :
6844 : #define COMPUTE_WEIGHT(iSrcX, dfWeightY) \
6845 : ((iSrcX == iSrcXMin) ? ((iSrcXMin + 1 == iSrcXMax) \
6846 : ? dfWeightY \
6847 : : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
6848 : : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax)) \
6849 : : dfWeightY)
6850 :
6851 1663170 : bool bDone = false;
6852 :
6853 : // Special Average mode where we process all bands together,
6854 : // to avoid averaging tuples that match an entry of m_aadfExcludedValues
6855 2267240 : if (nAlgo == GWKAOM_Average &&
6856 604073 : (!poWK->m_aadfExcludedValues.empty() ||
6857 393224 : dfNodataValuesThreshold < 1 - EPS) &&
6858 2267240 : !poWK->bApplyVerticalShift && !bIsComplex)
6859 : {
6860 393224 : double dfTotalWeightInvalid = 0.0;
6861 393224 : double dfTotalWeightExcluded = 0.0;
6862 393224 : double dfTotalWeightRegular = 0.0;
6863 786448 : std::vector<double> adfValueReal(poWK->nBands, 0);
6864 786448 : std::vector<double> adfValueAveraged(poWK->nBands, 0);
6865 : std::vector<int> anCountExcludedValues(
6866 393224 : poWK->m_aadfExcludedValues.size(), 0);
6867 :
6868 1572890 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
6869 : {
6870 1179660 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
6871 1179660 : iSrcOffset =
6872 1179660 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
6873 5111860 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
6874 : iSrcX++, iSrcOffset++)
6875 : {
6876 3932190 : if (bWrapOverX)
6877 0 : iSrcOffset =
6878 0 : (iSrcX % nSrcXSize) +
6879 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
6880 :
6881 3932190 : const double dfWeight =
6882 3932190 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
6883 3932190 : if (dfWeight <= 0)
6884 0 : continue;
6885 :
6886 3932200 : if (poWK->panUnifiedSrcValid != nullptr &&
6887 12 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6888 : {
6889 3 : dfTotalWeightInvalid += dfWeight;
6890 3 : continue;
6891 : }
6892 :
6893 3932190 : bool bAllValid = true;
6894 7274900 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6895 : {
6896 6160660 : double dfBandDensity = 0;
6897 6160660 : double dfValueImagTmp = 0;
6898 9503370 : if (!(GWKGetPixelValue(
6899 : poWK, iBand, iSrcOffset, &dfBandDensity,
6900 6160660 : &adfValueReal[iBand], &dfValueImagTmp) &&
6901 3342710 : dfBandDensity > BAND_DENSITY_THRESHOLD))
6902 : {
6903 2817950 : bAllValid = false;
6904 2817950 : break;
6905 : }
6906 : }
6907 :
6908 3932190 : if (!bAllValid)
6909 : {
6910 2817950 : dfTotalWeightInvalid += dfWeight;
6911 2817950 : continue;
6912 : }
6913 :
6914 1114240 : bool bExcludedValueFound = false;
6915 2228350 : for (size_t i = 0;
6916 2228350 : i < poWK->m_aadfExcludedValues.size(); ++i)
6917 : {
6918 1114130 : if (poWK->m_aadfExcludedValues[i] == adfValueReal)
6919 : {
6920 21 : bExcludedValueFound = true;
6921 21 : ++anCountExcludedValues[i];
6922 21 : dfTotalWeightExcluded += dfWeight;
6923 21 : break;
6924 : }
6925 : }
6926 1114240 : if (!bExcludedValueFound)
6927 : {
6928 : // Weighted incremental algorithm mean
6929 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
6930 1114220 : dfTotalWeightRegular += dfWeight;
6931 4456870 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6932 : {
6933 3342650 : adfValueAveraged[iBand] +=
6934 6685300 : (dfWeight / dfTotalWeightRegular) *
6935 6685300 : (adfValueReal[iBand] -
6936 3342650 : adfValueAveraged[iBand]);
6937 : }
6938 : }
6939 : }
6940 : }
6941 :
6942 393224 : const double dfTotalWeight = dfTotalWeightInvalid +
6943 : dfTotalWeightExcluded +
6944 : dfTotalWeightRegular;
6945 393224 : if (dfTotalWeightInvalid > 0 &&
6946 : dfTotalWeightInvalid >=
6947 311293 : dfNodataValuesThreshold * dfTotalWeight)
6948 : {
6949 : // Do nothing. Let bHasFoundDensity to false.
6950 : }
6951 81934 : else if (dfTotalWeightExcluded > 0 &&
6952 : dfTotalWeightExcluded >=
6953 6 : dfExcludedValuesThreshold * dfTotalWeight)
6954 : {
6955 : // Find the most represented excluded value tuple
6956 3 : size_t iExcludedValue = 0;
6957 3 : int nExcludedValueCount = 0;
6958 6 : for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
6959 : ++i)
6960 : {
6961 3 : if (anCountExcludedValues[i] > nExcludedValueCount)
6962 : {
6963 3 : iExcludedValue = i;
6964 3 : nExcludedValueCount = anCountExcludedValues[i];
6965 : }
6966 : }
6967 :
6968 3 : bHasFoundDensity = true;
6969 :
6970 12 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6971 : {
6972 9 : GWKSetPixelValue(
6973 : poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
6974 9 : poWK->m_aadfExcludedValues[iExcludedValue][iBand],
6975 : 0);
6976 3 : }
6977 : }
6978 81931 : else if (dfTotalWeightRegular > 0)
6979 : {
6980 81931 : bHasFoundDensity = true;
6981 :
6982 327720 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6983 : {
6984 245789 : GWKSetPixelValue(poWK, iBand, iDstOffset,
6985 : /* dfBandDensity = */ 1.0,
6986 245789 : adfValueAveraged[iBand], 0);
6987 : }
6988 : }
6989 :
6990 : // Skip below loop on bands
6991 393224 : bDone = true;
6992 : }
6993 :
6994 : /* ====================================================================
6995 : */
6996 : /* Loop processing each band. */
6997 : /* ====================================================================
6998 : */
6999 :
7000 4439540 : for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
7001 : {
7002 2776380 : double dfBandDensity = 0.0;
7003 2776380 : double dfValueReal = 0.0;
7004 2776380 : double dfValueImag = 0.0;
7005 2776380 : double dfValueRealTmp = 0.0;
7006 2776380 : double dfValueImagTmp = 0.0;
7007 :
7008 : /* --------------------------------------------------------------------
7009 : */
7010 : /* Collect the source value. */
7011 : /* --------------------------------------------------------------------
7012 : */
7013 :
7014 : // Loop over source lines and pixels - 3 possible algorithms.
7015 :
7016 : // poWK->eResample == GRA_Average.
7017 2776380 : if (nAlgo == GWKAOM_Average)
7018 : {
7019 300849 : double dfTotalWeight = 0.0;
7020 :
7021 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7022 : // in gcore/overview.cpp.
7023 631308 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7024 : {
7025 330459 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7026 330459 : iSrcOffset = iSrcXMin +
7027 330459 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7028 803200 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7029 : iSrcX++, iSrcOffset++)
7030 : {
7031 472741 : if (bWrapOverX)
7032 630 : iSrcOffset =
7033 630 : (iSrcX % nSrcXSize) +
7034 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7035 :
7036 472745 : if (poWK->panUnifiedSrcValid != nullptr &&
7037 4 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7038 : iSrcOffset))
7039 : {
7040 1 : continue;
7041 : }
7042 :
7043 472740 : if (GWKGetPixelValue(
7044 : poWK, iBand, iSrcOffset, &dfBandDensity,
7045 945480 : &dfValueRealTmp, &dfValueImagTmp) &&
7046 472740 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7047 : {
7048 472740 : const double dfWeight =
7049 472740 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7050 472740 : if (dfWeight > 0)
7051 : {
7052 : // Weighted incremental algorithm mean
7053 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7054 472740 : dfTotalWeight += dfWeight;
7055 472740 : dfValueReal +=
7056 472740 : (dfWeight / dfTotalWeight) *
7057 472740 : (dfValueRealTmp - dfValueReal);
7058 472740 : if (bIsComplex)
7059 : {
7060 252 : dfValueImag +=
7061 252 : (dfWeight / dfTotalWeight) *
7062 252 : (dfValueImagTmp - dfValueImag);
7063 : }
7064 : }
7065 : }
7066 : }
7067 : }
7068 :
7069 300849 : if (dfTotalWeight > 0)
7070 : {
7071 300849 : if (poWK->bApplyVerticalShift)
7072 : {
7073 0 : if (!std::isfinite(padfZ[iDstX]))
7074 0 : continue;
7075 : // Subtract padfZ[] since the coordinate
7076 : // transformation is from target to source
7077 0 : dfValueReal =
7078 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7079 0 : padfZ[iDstX] *
7080 : dfMultFactorVerticalShiftPipeline;
7081 : }
7082 :
7083 300849 : dfBandDensity = 1;
7084 300849 : bHasFoundDensity = true;
7085 : }
7086 : } // GRA_Average.
7087 : // poWK->eResample == GRA_RMS.
7088 2776380 : if (nAlgo == GWKAOM_RMS)
7089 : {
7090 300416 : double dfTotalReal = 0.0;
7091 300416 : double dfTotalImag = 0.0;
7092 300416 : double dfTotalWeight = 0.0;
7093 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7094 : // in gcore/overview.cpp.
7095 630578 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7096 : {
7097 330162 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7098 330162 : iSrcOffset = iSrcXMin +
7099 330162 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7100 802723 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7101 : iSrcX++, iSrcOffset++)
7102 : {
7103 472561 : if (bWrapOverX)
7104 630 : iSrcOffset =
7105 630 : (iSrcX % nSrcXSize) +
7106 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7107 :
7108 472561 : if (poWK->panUnifiedSrcValid != nullptr &&
7109 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7110 : iSrcOffset))
7111 : {
7112 0 : continue;
7113 : }
7114 :
7115 472561 : if (GWKGetPixelValue(
7116 : poWK, iBand, iSrcOffset, &dfBandDensity,
7117 945122 : &dfValueRealTmp, &dfValueImagTmp) &&
7118 472561 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7119 : {
7120 472561 : const double dfWeight =
7121 472561 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7122 472561 : dfTotalWeight += dfWeight;
7123 472561 : dfTotalReal +=
7124 472561 : dfValueRealTmp * dfValueRealTmp * dfWeight;
7125 472561 : if (bIsComplex)
7126 48 : dfTotalImag += dfValueImagTmp *
7127 48 : dfValueImagTmp * dfWeight;
7128 : }
7129 : }
7130 : }
7131 :
7132 300416 : if (dfTotalWeight > 0)
7133 : {
7134 300416 : dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
7135 :
7136 300416 : if (poWK->bApplyVerticalShift)
7137 : {
7138 0 : if (!std::isfinite(padfZ[iDstX]))
7139 0 : continue;
7140 : // Subtract padfZ[] since the coordinate
7141 : // transformation is from target to source
7142 0 : dfValueReal =
7143 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7144 0 : padfZ[iDstX] *
7145 : dfMultFactorVerticalShiftPipeline;
7146 : }
7147 :
7148 300416 : if (bIsComplex)
7149 12 : dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
7150 :
7151 300416 : dfBandDensity = 1;
7152 300416 : bHasFoundDensity = true;
7153 : }
7154 : } // GRA_RMS.
7155 : #ifdef disabled
7156 : else if (nAlgo == GWKAOM_Sum)
7157 : // poWK->eResample == GRA_Sum
7158 : {
7159 : double dfTotalReal = 0.0;
7160 : double dfTotalImag = 0.0;
7161 : bool bFoundValid = false;
7162 :
7163 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7164 : {
7165 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7166 : iSrcOffset = iSrcXMin +
7167 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7168 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7169 : iSrcX++, iSrcOffset++)
7170 : {
7171 : if (bWrapOverX)
7172 : iSrcOffset =
7173 : (iSrcX % nSrcXSize) +
7174 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7175 :
7176 : if (poWK->panUnifiedSrcValid != nullptr &&
7177 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7178 : iSrcOffset))
7179 : {
7180 : continue;
7181 : }
7182 :
7183 : if (GWKGetPixelValue(
7184 : poWK, iBand, iSrcOffset, &dfBandDensity,
7185 : &dfValueRealTmp, &dfValueImagTmp) &&
7186 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7187 : {
7188 : const double dfWeight =
7189 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7190 : bFoundValid = true;
7191 : dfTotalReal += dfValueRealTmp * dfWeight;
7192 : if (bIsComplex)
7193 : {
7194 : dfTotalImag += dfValueImagTmp * dfWeight;
7195 : }
7196 : }
7197 : }
7198 : }
7199 :
7200 : if (bFoundValid)
7201 : {
7202 : dfValueReal = dfTotalReal;
7203 :
7204 : if (poWK->bApplyVerticalShift)
7205 : {
7206 : if (!std::isfinite(padfZ[iDstX]))
7207 : continue;
7208 : // Subtract padfZ[] since the coordinate
7209 : // transformation is from target to source
7210 : dfValueReal =
7211 : dfValueReal * poWK->dfMultFactorVerticalShift -
7212 : padfZ[iDstX] *
7213 : dfMultFactorVerticalShiftPipeline;
7214 : }
7215 :
7216 : if (bIsComplex)
7217 : {
7218 : dfValueImag = dfTotalImag;
7219 : }
7220 : dfBandDensity = 1;
7221 : bHasFoundDensity = true;
7222 : }
7223 : } // GRA_Sum.
7224 : #endif
7225 2475960 : else if (nAlgo == GWKAOM_Imode || nAlgo == GWKAOM_Fmode)
7226 : // poWK->eResample == GRA_Mode
7227 : {
7228 : // This code adapted from GDALDownsampleChunk32R_Mode() in
7229 : // gcore/overview.cpp.
7230 500026 : if (nAlgo == GWKAOM_Fmode) // int32 or float.
7231 : {
7232 : // Does it make sense it makes to run a
7233 : // majority filter on floating point data? But, here it
7234 : // is for the sake of compatibility. It won't look
7235 : // right on RGB images by the nature of the filter.
7236 3407 : nBins = 0;
7237 3407 : int iModeIndex = -1;
7238 :
7239 10228 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7240 : {
7241 6821 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7242 6821 : iSrcOffset =
7243 6821 : iSrcXMin +
7244 6821 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7245 20484 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7246 : iSrcX++, iSrcOffset++)
7247 : {
7248 13663 : if (bWrapOverX)
7249 0 : iSrcOffset =
7250 0 : (iSrcX % nSrcXSize) +
7251 0 : static_cast<GPtrDiff_t>(iSrcY) *
7252 0 : nSrcXSize;
7253 :
7254 13663 : if (poWK->panUnifiedSrcValid != nullptr &&
7255 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7256 : iSrcOffset))
7257 0 : continue;
7258 :
7259 13663 : if (GWKGetPixelValue(
7260 : poWK, iBand, iSrcOffset, &dfBandDensity,
7261 27326 : &dfValueRealTmp, &dfValueImagTmp) &&
7262 13663 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7263 : {
7264 13663 : const float fVal =
7265 13663 : static_cast<float>(dfValueRealTmp);
7266 13663 : const double dfWeight =
7267 13663 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7268 :
7269 : // Check array for existing entry.
7270 13663 : int i = 0;
7271 29135 : for (i = 0; i < nBins; ++i)
7272 : {
7273 17768 : if (pafRealVals[i] == fVal)
7274 : {
7275 :
7276 2296 : pafCounts[i] +=
7277 2296 : static_cast<float>(dfWeight);
7278 2296 : bool bValIsMaxCount =
7279 2296 : (pafCounts[i] >
7280 2296 : pafCounts[iModeIndex]);
7281 :
7282 2296 : if (!bValIsMaxCount &&
7283 1492 : pafCounts[i] ==
7284 1492 : pafCounts[iModeIndex])
7285 : {
7286 1487 : switch (eTieStrategy)
7287 : {
7288 1474 : case GWKTS_First:
7289 1474 : break;
7290 6 : case GWKTS_Min:
7291 6 : bValIsMaxCount =
7292 : fVal <
7293 : pafRealVals
7294 6 : [iModeIndex];
7295 6 : break;
7296 7 : case GWKTS_Max:
7297 7 : bValIsMaxCount =
7298 : fVal >
7299 : pafRealVals
7300 7 : [iModeIndex];
7301 7 : break;
7302 : }
7303 : }
7304 :
7305 2296 : if (bValIsMaxCount)
7306 : {
7307 807 : iModeIndex = i;
7308 : }
7309 :
7310 2296 : break;
7311 : }
7312 : }
7313 :
7314 : // Add to arr if entry not already there.
7315 13663 : if (i == nBins)
7316 : {
7317 11367 : pafRealVals[i] = fVal;
7318 11367 : pafCounts[i] =
7319 11367 : static_cast<float>(dfWeight);
7320 :
7321 11367 : if (iModeIndex < 0)
7322 3407 : iModeIndex = i;
7323 :
7324 11367 : ++nBins;
7325 : }
7326 : }
7327 : }
7328 : }
7329 :
7330 3407 : if (iModeIndex != -1)
7331 : {
7332 3407 : dfValueReal = pafRealVals[iModeIndex];
7333 :
7334 3407 : if (poWK->bApplyVerticalShift)
7335 : {
7336 0 : if (!std::isfinite(padfZ[iDstX]))
7337 0 : continue;
7338 : // Subtract padfZ[] since the coordinate
7339 : // transformation is from target to source
7340 0 : dfValueReal =
7341 0 : dfValueReal *
7342 0 : poWK->dfMultFactorVerticalShift -
7343 0 : padfZ[iDstX] *
7344 : dfMultFactorVerticalShiftPipeline;
7345 : }
7346 :
7347 3407 : dfBandDensity = 1;
7348 3407 : bHasFoundDensity = true;
7349 : }
7350 : }
7351 : else // byte or int16.
7352 : {
7353 496619 : float fMaxCount = 0.0f;
7354 496619 : int nMode = -1;
7355 496619 : bool bHasSourceValues = false;
7356 :
7357 496619 : memset(pafCounts, 0, nBins * sizeof(float));
7358 :
7359 1612550 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7360 : {
7361 1115930 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7362 1115930 : iSrcOffset =
7363 1115930 : iSrcXMin +
7364 1115930 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7365 4733150 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7366 : iSrcX++, iSrcOffset++)
7367 : {
7368 3617220 : if (bWrapOverX)
7369 630 : iSrcOffset =
7370 630 : (iSrcX % nSrcXSize) +
7371 630 : static_cast<GPtrDiff_t>(iSrcY) *
7372 630 : nSrcXSize;
7373 :
7374 3617220 : if (poWK->panUnifiedSrcValid != nullptr &&
7375 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7376 : iSrcOffset))
7377 0 : continue;
7378 :
7379 3617220 : if (GWKGetPixelValue(
7380 : poWK, iBand, iSrcOffset, &dfBandDensity,
7381 7234430 : &dfValueRealTmp, &dfValueImagTmp) &&
7382 3617220 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7383 : {
7384 3617220 : bHasSourceValues = true;
7385 3617220 : const int nVal =
7386 3617220 : static_cast<int>(dfValueRealTmp);
7387 3617220 : const int iBin = nVal + nBinsOffset;
7388 3617220 : const double dfWeight =
7389 3617220 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7390 :
7391 : // Sum the density.
7392 3617220 : pafCounts[iBin] +=
7393 3617220 : static_cast<float>(dfWeight);
7394 : // Is it the most common value so far?
7395 3617220 : bool bUpdateMode =
7396 3617220 : pafCounts[iBin] > fMaxCount;
7397 3617220 : if (!bUpdateMode &&
7398 778312 : pafCounts[iBin] == fMaxCount)
7399 : {
7400 218624 : switch (eTieStrategy)
7401 : {
7402 218616 : case GWKTS_First:
7403 218616 : break;
7404 4 : case GWKTS_Min:
7405 4 : bUpdateMode = nVal < nMode;
7406 4 : break;
7407 4 : case GWKTS_Max:
7408 4 : bUpdateMode = nVal > nMode;
7409 4 : break;
7410 : }
7411 : }
7412 3617220 : if (bUpdateMode)
7413 : {
7414 2838910 : nMode = nVal;
7415 2838910 : fMaxCount = pafCounts[iBin];
7416 : }
7417 : }
7418 : }
7419 : }
7420 :
7421 496619 : if (bHasSourceValues)
7422 : {
7423 496619 : dfValueReal = nMode;
7424 :
7425 496619 : if (poWK->bApplyVerticalShift)
7426 : {
7427 0 : if (!std::isfinite(padfZ[iDstX]))
7428 0 : continue;
7429 : // Subtract padfZ[] since the coordinate
7430 : // transformation is from target to source
7431 0 : dfValueReal =
7432 0 : dfValueReal *
7433 0 : poWK->dfMultFactorVerticalShift -
7434 0 : padfZ[iDstX] *
7435 : dfMultFactorVerticalShiftPipeline;
7436 : }
7437 :
7438 496619 : dfBandDensity = 1;
7439 496619 : bHasFoundDensity = true;
7440 : }
7441 500026 : }
7442 : } // GRA_Mode.
7443 1975930 : else if (nAlgo == GWKAOM_Max)
7444 : // poWK->eResample == GRA_Max.
7445 : {
7446 335037 : bool bFoundValid = false;
7447 335037 : double dfTotalReal = cpl::NumericLimits<double>::lowest();
7448 : // This code adapted from nAlgo 1 method, GRA_Average.
7449 1288010 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7450 : {
7451 952975 : iSrcOffset = iSrcXMin +
7452 952975 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7453 4406540 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7454 : iSrcX++, iSrcOffset++)
7455 : {
7456 3453560 : if (bWrapOverX)
7457 630 : iSrcOffset =
7458 630 : (iSrcX % nSrcXSize) +
7459 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7460 :
7461 3456370 : if (poWK->panUnifiedSrcValid != nullptr &&
7462 2809 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7463 : iSrcOffset))
7464 : {
7465 2446 : continue;
7466 : }
7467 :
7468 : // Returns pixel value if it is not no data.
7469 3451120 : if (GWKGetPixelValue(
7470 : poWK, iBand, iSrcOffset, &dfBandDensity,
7471 6902230 : &dfValueRealTmp, &dfValueImagTmp) &&
7472 3451120 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7473 : {
7474 3451120 : bFoundValid = true;
7475 3451120 : if (dfTotalReal < dfValueRealTmp)
7476 : {
7477 442642 : dfTotalReal = dfValueRealTmp;
7478 : }
7479 : }
7480 : }
7481 : }
7482 :
7483 335037 : if (bFoundValid)
7484 : {
7485 335037 : dfValueReal = dfTotalReal;
7486 :
7487 335037 : if (poWK->bApplyVerticalShift)
7488 : {
7489 0 : if (!std::isfinite(padfZ[iDstX]))
7490 0 : continue;
7491 : // Subtract padfZ[] since the coordinate
7492 : // transformation is from target to source
7493 0 : dfValueReal =
7494 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7495 0 : padfZ[iDstX] *
7496 : dfMultFactorVerticalShiftPipeline;
7497 : }
7498 :
7499 335037 : dfBandDensity = 1;
7500 335037 : bHasFoundDensity = true;
7501 : }
7502 : } // GRA_Max.
7503 1640900 : else if (nAlgo == GWKAOM_Min)
7504 : // poWK->eResample == GRA_Min.
7505 : {
7506 335012 : bool bFoundValid = false;
7507 335012 : double dfTotalReal = cpl::NumericLimits<double>::max();
7508 : // This code adapted from nAlgo 1 method, GRA_Average.
7509 1287720 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7510 : {
7511 952710 : iSrcOffset = iSrcXMin +
7512 952710 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7513 4403460 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7514 : iSrcX++, iSrcOffset++)
7515 : {
7516 3450750 : if (bWrapOverX)
7517 630 : iSrcOffset =
7518 630 : (iSrcX % nSrcXSize) +
7519 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7520 :
7521 3450750 : if (poWK->panUnifiedSrcValid != nullptr &&
7522 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7523 : iSrcOffset))
7524 : {
7525 0 : continue;
7526 : }
7527 :
7528 : // Returns pixel value if it is not no data.
7529 3450750 : if (GWKGetPixelValue(
7530 : poWK, iBand, iSrcOffset, &dfBandDensity,
7531 6901500 : &dfValueRealTmp, &dfValueImagTmp) &&
7532 3450750 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7533 : {
7534 3450750 : bFoundValid = true;
7535 3450750 : if (dfTotalReal > dfValueRealTmp)
7536 : {
7537 443069 : dfTotalReal = dfValueRealTmp;
7538 : }
7539 : }
7540 : }
7541 : }
7542 :
7543 335012 : if (bFoundValid)
7544 : {
7545 335012 : dfValueReal = dfTotalReal;
7546 :
7547 335012 : if (poWK->bApplyVerticalShift)
7548 : {
7549 0 : if (!std::isfinite(padfZ[iDstX]))
7550 0 : continue;
7551 : // Subtract padfZ[] since the coordinate
7552 : // transformation is from target to source
7553 0 : dfValueReal =
7554 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7555 0 : padfZ[iDstX] *
7556 : dfMultFactorVerticalShiftPipeline;
7557 : }
7558 :
7559 335012 : dfBandDensity = 1;
7560 335012 : bHasFoundDensity = true;
7561 : }
7562 : } // GRA_Min.
7563 1305880 : else if (nAlgo == GWKAOM_Quant)
7564 : // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
7565 : {
7566 1005040 : bool bFoundValid = false;
7567 1005040 : std::vector<double> dfRealValuesTmp;
7568 :
7569 : // This code adapted from nAlgo 1 method, GRA_Average.
7570 3863170 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7571 : {
7572 2858130 : iSrcOffset = iSrcXMin +
7573 2858130 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7574 13210400 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7575 : iSrcX++, iSrcOffset++)
7576 : {
7577 10352300 : if (bWrapOverX)
7578 1890 : iSrcOffset =
7579 1890 : (iSrcX % nSrcXSize) +
7580 1890 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7581 :
7582 10352300 : if (poWK->panUnifiedSrcValid != nullptr &&
7583 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7584 : iSrcOffset))
7585 : {
7586 0 : continue;
7587 : }
7588 :
7589 : // Returns pixel value if it is not no data.
7590 10352300 : if (GWKGetPixelValue(
7591 : poWK, iBand, iSrcOffset, &dfBandDensity,
7592 20704500 : &dfValueRealTmp, &dfValueImagTmp) &&
7593 10352300 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7594 : {
7595 10352300 : bFoundValid = true;
7596 10352300 : dfRealValuesTmp.push_back(dfValueRealTmp);
7597 : }
7598 : }
7599 : }
7600 :
7601 1005040 : if (bFoundValid)
7602 : {
7603 1005040 : std::sort(dfRealValuesTmp.begin(),
7604 : dfRealValuesTmp.end());
7605 : int quantIdx = static_cast<int>(
7606 1005040 : std::ceil(quant * dfRealValuesTmp.size() - 1));
7607 1005040 : dfValueReal = dfRealValuesTmp[quantIdx];
7608 :
7609 1005040 : if (poWK->bApplyVerticalShift)
7610 : {
7611 0 : if (!std::isfinite(padfZ[iDstX]))
7612 0 : continue;
7613 : // Subtract padfZ[] since the coordinate
7614 : // transformation is from target to source
7615 0 : dfValueReal =
7616 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7617 0 : padfZ[iDstX] *
7618 : dfMultFactorVerticalShiftPipeline;
7619 : }
7620 :
7621 1005040 : dfBandDensity = 1;
7622 1005040 : bHasFoundDensity = true;
7623 1005040 : dfRealValuesTmp.clear();
7624 : }
7625 : } // Quantile.
7626 :
7627 : /* --------------------------------------------------------------------
7628 : */
7629 : /* We have a computed value from the source. Now apply it
7630 : * to */
7631 : /* the destination pixel. */
7632 : /* --------------------------------------------------------------------
7633 : */
7634 2776380 : if (bHasFoundDensity)
7635 : {
7636 : // TODO: Should we compute dfBandDensity in fct of
7637 : // nCount/nCount2, or use as a threshold to set the dest
7638 : // value?
7639 : // dfBandDensity = (float) nCount / nCount2;
7640 : // if( (float) nCount / nCount2 > 0.1 )
7641 : // or fix gdalwarp crop_to_cutline to crop partially
7642 : // overlapping pixels.
7643 2776380 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
7644 : dfValueReal, dfValueImag);
7645 : }
7646 : }
7647 :
7648 1663170 : if (!bHasFoundDensity)
7649 311290 : continue;
7650 :
7651 : /* --------------------------------------------------------------------
7652 : */
7653 : /* Update destination density/validity masks. */
7654 : /* --------------------------------------------------------------------
7655 : */
7656 1351880 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7657 :
7658 1351880 : if (poWK->panDstValid != nullptr)
7659 : {
7660 74 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7661 : }
7662 : } /* Next iDstX */
7663 :
7664 : /* --------------------------------------------------------------------
7665 : */
7666 : /* Report progress to the user, and optionally cancel out. */
7667 : /* --------------------------------------------------------------------
7668 : */
7669 6497 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7670 0 : break;
7671 : }
7672 :
7673 : /* -------------------------------------------------------------------- */
7674 : /* Cleanup and return. */
7675 : /* -------------------------------------------------------------------- */
7676 130 : CPLFree(padfX);
7677 130 : CPLFree(padfY);
7678 130 : CPLFree(padfZ);
7679 130 : CPLFree(padfX2);
7680 130 : CPLFree(padfY2);
7681 130 : CPLFree(padfZ2);
7682 130 : CPLFree(pabSuccess);
7683 130 : CPLFree(pabSuccess2);
7684 130 : VSIFree(pafCounts);
7685 130 : VSIFree(pafRealVals);
7686 : }
7687 :
7688 : /************************************************************************/
7689 : /* getOrientation() */
7690 : /************************************************************************/
7691 :
7692 : typedef std::pair<double, double> XYPair;
7693 :
7694 : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
7695 : // -1 if it is counter-clockwise oriented,
7696 : // or 0 if it is colinear.
7697 2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
7698 : {
7699 2355910 : const double p1x = p1.first;
7700 2355910 : const double p1y = p1.second;
7701 2355910 : const double p2x = p2.first;
7702 2355910 : const double p2y = p2.second;
7703 2355910 : const double p3x = p3.first;
7704 2355910 : const double p3y = p3.second;
7705 2355910 : const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
7706 2355910 : if (std::abs(val) < 1e-20)
7707 2690 : return 0;
7708 2353220 : else if (val > 0)
7709 0 : return 1;
7710 : else
7711 2353220 : return -1;
7712 : }
7713 :
7714 : /************************************************************************/
7715 : /* isConvex() */
7716 : /************************************************************************/
7717 :
7718 : typedef std::vector<XYPair> XYPoly;
7719 :
7720 : // poly must be closed
7721 785302 : static bool isConvex(const XYPoly &poly)
7722 : {
7723 785302 : const size_t n = poly.size();
7724 785302 : size_t i = 0;
7725 785302 : int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
7726 785302 : ++i;
7727 2355910 : for (; i < n - 2; ++i)
7728 : {
7729 : const int orientation =
7730 1570600 : getOrientation(poly[i], poly[i + 1], poly[i + 2]);
7731 1570600 : if (orientation != 0)
7732 : {
7733 1567910 : if (last_orientation == 0)
7734 0 : last_orientation = orientation;
7735 1567910 : else if (orientation != last_orientation)
7736 0 : return false;
7737 : }
7738 : }
7739 785302 : return true;
7740 : }
7741 :
7742 : /************************************************************************/
7743 : /* pointIntersectsConvexPoly() */
7744 : /************************************************************************/
7745 :
7746 : // Returns whether xy intersects poly, that must be closed and convex.
7747 6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
7748 : {
7749 6049100 : const size_t n = poly.size();
7750 6049100 : double dx1 = xy.first - poly[0].first;
7751 6049100 : double dy1 = xy.second - poly[0].second;
7752 6049100 : double dx2 = poly[1].first - poly[0].first;
7753 6049100 : double dy2 = poly[1].second - poly[0].second;
7754 6049100 : double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
7755 :
7756 : // Check if the point remains on the same side (left/right) of all edges
7757 14556400 : for (size_t i = 2; i < n; i++)
7758 : {
7759 12793100 : dx1 = xy.first - poly[i - 1].first;
7760 12793100 : dy1 = xy.second - poly[i - 1].second;
7761 :
7762 12793100 : dx2 = poly[i].first - poly[i - 1].first;
7763 12793100 : dy2 = poly[i].second - poly[i - 1].second;
7764 :
7765 12793100 : double crossProduct = dx1 * dy2 - dx2 * dy1;
7766 12793100 : if (std::abs(prevCrossProduct) < 1e-20)
7767 725558 : prevCrossProduct = crossProduct;
7768 12067500 : else if (prevCrossProduct * crossProduct < 0)
7769 4285760 : return false;
7770 : }
7771 :
7772 1763340 : return true;
7773 : }
7774 :
7775 : /************************************************************************/
7776 : /* getIntersection() */
7777 : /************************************************************************/
7778 :
7779 : /* Returns intersection of [p1,p2] with [p3,p4], if
7780 : * it is a single point, and the 2 segments are not colinear.
7781 : */
7782 11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
7783 : const XYPair &p3, const XYPair &p4, XYPair &xy)
7784 : {
7785 11811000 : const double x1 = p1.first;
7786 11811000 : const double y1 = p1.second;
7787 11811000 : const double x2 = p2.first;
7788 11811000 : const double y2 = p2.second;
7789 11811000 : const double x3 = p3.first;
7790 11811000 : const double y3 = p3.second;
7791 11811000 : const double x4 = p4.first;
7792 11811000 : const double y4 = p4.second;
7793 11811000 : const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
7794 11811000 : const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
7795 11811000 : if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
7796 9260780 : return false;
7797 :
7798 2550260 : const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
7799 2550260 : if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
7800 973924 : return false;
7801 :
7802 1576340 : const double t = t_num / denom;
7803 1576340 : xy.first = x1 + t * (x2 - x1);
7804 1576340 : xy.second = y1 + t * (y2 - y1);
7805 1576340 : return true;
7806 : }
7807 :
7808 : /************************************************************************/
7809 : /* getConvexPolyIntersection() */
7810 : /************************************************************************/
7811 :
7812 : // poly1 and poly2 must be closed and convex.
7813 : // The returned intersection will not necessary be closed.
7814 785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
7815 : XYPoly &intersection)
7816 : {
7817 785302 : intersection.clear();
7818 :
7819 : // Add all points of poly1 inside poly2
7820 3926510 : for (size_t i = 0; i < poly1.size() - 1; ++i)
7821 : {
7822 3141210 : if (pointIntersectsConvexPoly(poly1[i], poly2))
7823 1187430 : intersection.push_back(poly1[i]);
7824 : }
7825 785302 : if (intersection.size() == poly1.size() - 1)
7826 : {
7827 : // poly1 is inside poly2
7828 119100 : return;
7829 : }
7830 :
7831 : // Add all points of poly2 inside poly1
7832 3634860 : for (size_t i = 0; i < poly2.size() - 1; ++i)
7833 : {
7834 2907890 : if (pointIntersectsConvexPoly(poly2[i], poly1))
7835 575904 : intersection.push_back(poly2[i]);
7836 : }
7837 :
7838 : // Compute the intersection of all edges of both polygons
7839 726972 : XYPair xy;
7840 3634860 : for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
7841 : {
7842 14539400 : for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
7843 : {
7844 11631600 : if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
7845 11631600 : poly2[i2 + 1], xy))
7846 : {
7847 1576230 : intersection.push_back(xy);
7848 : }
7849 : }
7850 : }
7851 :
7852 726972 : if (intersection.empty())
7853 60770 : return;
7854 :
7855 : // Find lowest-left point in intersection set
7856 666202 : double lowest_x = cpl::NumericLimits<double>::max();
7857 666202 : double lowest_y = cpl::NumericLimits<double>::max();
7858 3772450 : for (const auto &pair : intersection)
7859 : {
7860 3106240 : const double x = pair.first;
7861 3106240 : const double y = pair.second;
7862 3106240 : if (y < lowest_y || (y == lowest_y && x < lowest_x))
7863 : {
7864 1096040 : lowest_x = x;
7865 1096040 : lowest_y = y;
7866 : }
7867 : }
7868 :
7869 5737980 : const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
7870 : {
7871 5737980 : const double p1x_diff = p1.first - lowest_x;
7872 5737980 : const double p1y_diff = p1.second - lowest_y;
7873 5737980 : const double p2x_diff = p2.first - lowest_x;
7874 5737980 : const double p2y_diff = p2.second - lowest_y;
7875 5737980 : if (p2y_diff == 0.0 && p1y_diff == 0.0)
7876 : {
7877 2655420 : if (p1x_diff >= 0)
7878 : {
7879 2655420 : if (p2x_diff >= 0)
7880 2655420 : return p1.first < p2.first;
7881 0 : return true;
7882 : }
7883 : else
7884 : {
7885 0 : if (p2x_diff >= 0)
7886 0 : return false;
7887 0 : return p1.first < p2.first;
7888 : }
7889 : }
7890 :
7891 3082560 : if (p2x_diff == 0.0 && p1x_diff == 0.0)
7892 1046960 : return p1.second < p2.second;
7893 :
7894 : double tan_p1;
7895 2035600 : if (p1x_diff == 0.0)
7896 464622 : tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
7897 : else
7898 1570980 : tan_p1 = p1y_diff / p1x_diff;
7899 :
7900 : double tan_p2;
7901 2035600 : if (p2x_diff == 0.0)
7902 839515 : tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
7903 : else
7904 1196080 : tan_p2 = p2y_diff / p2x_diff;
7905 :
7906 2035600 : if (tan_p1 >= 0)
7907 : {
7908 1904790 : if (tan_p2 >= 0)
7909 1881590 : return tan_p1 < tan_p2;
7910 : else
7911 23199 : return true;
7912 : }
7913 : else
7914 : {
7915 130806 : if (tan_p2 >= 0)
7916 103900 : return false;
7917 : else
7918 26906 : return tan_p1 < tan_p2;
7919 : }
7920 666202 : };
7921 :
7922 : // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
7923 : // hull
7924 666202 : std::sort(intersection.begin(), intersection.end(), sortFunc);
7925 :
7926 : // Remove duplicated points
7927 666202 : size_t j = 1;
7928 3106240 : for (size_t i = 1; i < intersection.size(); ++i)
7929 : {
7930 2440040 : if (intersection[i] != intersection[i - 1])
7931 : {
7932 1452560 : if (j < i)
7933 545275 : intersection[j] = intersection[i];
7934 1452560 : ++j;
7935 : }
7936 : }
7937 666202 : intersection.resize(j);
7938 : }
7939 :
7940 : /************************************************************************/
7941 : /* getArea() */
7942 : /************************************************************************/
7943 :
7944 : // poly may or may not be closed.
7945 558521 : static double getArea(const XYPoly &poly)
7946 : {
7947 : // CPLAssert(poly.size() >= 2);
7948 558521 : const size_t nPointCount = poly.size();
7949 : double dfAreaSum =
7950 558521 : poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
7951 :
7952 1765140 : for (size_t i = 1; i < nPointCount - 1; i++)
7953 : {
7954 1206610 : dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
7955 : }
7956 :
7957 558521 : dfAreaSum += poly[nPointCount - 1].first *
7958 558521 : (poly[0].second - poly[nPointCount - 2].second);
7959 :
7960 558521 : return 0.5 * std::fabs(dfAreaSum);
7961 : }
7962 :
7963 : /************************************************************************/
7964 : /* GWKSumPreserving() */
7965 : /************************************************************************/
7966 :
7967 : static void GWKSumPreservingThread(void *pData);
7968 :
7969 18 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
7970 : {
7971 18 : return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
7972 : }
7973 :
7974 18 : static void GWKSumPreservingThread(void *pData)
7975 : {
7976 18 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
7977 18 : GDALWarpKernel *poWK = psJob->poWK;
7978 18 : const int iYMin = psJob->iYMin;
7979 18 : const int iYMax = psJob->iYMax;
7980 : const bool bIsAffineNoRotation =
7981 18 : GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
7982 26 : poWK->pTransformerArg) &&
7983 : // for debug/testing purposes
7984 8 : CPLTestBool(
7985 18 : CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
7986 :
7987 18 : const int nDstXSize = poWK->nDstXSize;
7988 18 : const int nSrcXSize = poWK->nSrcXSize;
7989 18 : const int nSrcYSize = poWK->nSrcYSize;
7990 :
7991 36 : std::vector<double> adfX0(nSrcXSize + 1);
7992 36 : std::vector<double> adfY0(nSrcXSize + 1);
7993 36 : std::vector<double> adfZ0(nSrcXSize + 1);
7994 36 : std::vector<double> adfX1(nSrcXSize + 1);
7995 36 : std::vector<double> adfY1(nSrcXSize + 1);
7996 36 : std::vector<double> adfZ1(nSrcXSize + 1);
7997 36 : std::vector<int> abSuccess0(nSrcXSize + 1);
7998 36 : std::vector<int> abSuccess1(nSrcXSize + 1);
7999 :
8000 : CPLRectObj sGlobalBounds;
8001 18 : sGlobalBounds.minx = -2 * poWK->dfXScale;
8002 18 : sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
8003 18 : sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
8004 18 : sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
8005 18 : CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
8006 :
8007 : struct SourcePixel
8008 : {
8009 : int iSrcX;
8010 : int iSrcY;
8011 :
8012 : // Coordinates of source pixel in target pixel coordinates
8013 : double dfDstX0;
8014 : double dfDstY0;
8015 : double dfDstX1;
8016 : double dfDstY1;
8017 : double dfDstX2;
8018 : double dfDstY2;
8019 : double dfDstX3;
8020 : double dfDstY3;
8021 :
8022 : // Source pixel total area (might be larger than the one described
8023 : // by above coordinates, if the pixel was crossing the antimeridian
8024 : // and split)
8025 : double dfArea;
8026 : };
8027 :
8028 36 : std::vector<SourcePixel> sourcePixels;
8029 :
8030 36 : XYPoly discontinuityLeft(5);
8031 36 : XYPoly discontinuityRight(5);
8032 :
8033 : /* ==================================================================== */
8034 : /* First pass: transform the 4 corners of each potential */
8035 : /* contributing source pixel to target pixel coordinates. */
8036 : /* ==================================================================== */
8037 :
8038 : // Special case for top line
8039 : {
8040 18 : int iY = 0;
8041 1130 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8042 : {
8043 1112 : adfX1[iX] = iX + poWK->nSrcXOff;
8044 1112 : adfY1[iX] = iY + poWK->nSrcYOff;
8045 1112 : adfZ1[iX] = 0;
8046 : }
8047 :
8048 18 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8049 : adfX1.data(), adfY1.data(), adfZ1.data(),
8050 : abSuccess1.data());
8051 :
8052 1130 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8053 : {
8054 1112 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8055 0 : abSuccess1[iX] = FALSE;
8056 : else
8057 : {
8058 1112 : adfX1[iX] -= poWK->nDstXOff;
8059 1112 : adfY1[iX] -= poWK->nDstYOff;
8060 : }
8061 : }
8062 : }
8063 :
8064 413412 : const auto getInsideXSign = [poWK, nDstXSize](double dfX)
8065 : {
8066 413412 : return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
8067 205344 : dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
8068 413412 : ? 1
8069 208068 : : -1;
8070 18 : };
8071 :
8072 : const auto FindDiscontinuity =
8073 80 : [poWK, psJob, getInsideXSign](
8074 : double dfXLeft, double dfXRight, double dfY,
8075 : int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
8076 800 : double &dfXMidReprojectedRight, double &dfYMidReprojected)
8077 : {
8078 880 : for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
8079 : {
8080 800 : double dfXMid = (dfXLeft + dfXRight) / 2;
8081 800 : double dfXMidReprojected = dfXMid;
8082 800 : dfYMidReprojected = dfY;
8083 800 : double dfZ = 0;
8084 800 : int nSuccess = 0;
8085 800 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
8086 : &dfXMidReprojected, &dfYMidReprojected, &dfZ,
8087 : &nSuccess);
8088 800 : if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
8089 : {
8090 456 : dfXRight = dfXMid;
8091 456 : dfXMidReprojectedRight = dfXMidReprojected;
8092 : }
8093 : else
8094 : {
8095 344 : dfXLeft = dfXMid;
8096 344 : dfXMidReprojectedLeft = dfXMidReprojected;
8097 : }
8098 : }
8099 80 : };
8100 :
8101 566 : for (int iY = 0; iY < nSrcYSize; ++iY)
8102 : {
8103 548 : std::swap(adfX0, adfX1);
8104 548 : std::swap(adfY0, adfY1);
8105 548 : std::swap(adfZ0, adfZ1);
8106 548 : std::swap(abSuccess0, abSuccess1);
8107 :
8108 104512 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8109 : {
8110 103964 : adfX1[iX] = iX + poWK->nSrcXOff;
8111 103964 : adfY1[iX] = iY + 1 + poWK->nSrcYOff;
8112 103964 : adfZ1[iX] = 0;
8113 : }
8114 :
8115 548 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8116 : adfX1.data(), adfY1.data(), adfZ1.data(),
8117 : abSuccess1.data());
8118 :
8119 104512 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8120 : {
8121 103964 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8122 0 : abSuccess1[iX] = FALSE;
8123 : else
8124 : {
8125 103964 : adfX1[iX] -= poWK->nDstXOff;
8126 103964 : adfY1[iX] -= poWK->nDstYOff;
8127 : }
8128 : }
8129 :
8130 103964 : for (int iX = 0; iX < nSrcXSize; ++iX)
8131 : {
8132 206832 : if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
8133 103416 : abSuccess1[iX + 1])
8134 : {
8135 : /* --------------------------------------------------------------------
8136 : */
8137 : /* Do not try to apply transparent source pixels to the
8138 : * destination.*/
8139 : /* --------------------------------------------------------------------
8140 : */
8141 103416 : const auto iSrcOffset =
8142 103416 : iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
8143 105816 : if (poWK->panUnifiedSrcValid != nullptr &&
8144 2400 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
8145 : {
8146 10971 : continue;
8147 : }
8148 :
8149 103410 : if (poWK->pafUnifiedSrcDensity != nullptr)
8150 : {
8151 0 : if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
8152 : SRC_DENSITY_THRESHOLD)
8153 0 : continue;
8154 : }
8155 :
8156 : SourcePixel sp;
8157 103410 : sp.dfArea = 0;
8158 103410 : sp.dfDstX0 = adfX0[iX];
8159 103410 : sp.dfDstY0 = adfY0[iX];
8160 103410 : sp.dfDstX1 = adfX0[iX + 1];
8161 103410 : sp.dfDstY1 = adfY0[iX + 1];
8162 103410 : sp.dfDstX2 = adfX1[iX + 1];
8163 103410 : sp.dfDstY2 = adfY1[iX + 1];
8164 103410 : sp.dfDstX3 = adfX1[iX];
8165 103410 : sp.dfDstY3 = adfY1[iX];
8166 :
8167 : // Detect pixel that likely cross the anti-meridian and
8168 : // introduce a discontinuity when reprojected.
8169 :
8170 103410 : if (getInsideXSign(adfX0[iX]) !=
8171 103506 : getInsideXSign(adfX0[iX + 1]) &&
8172 164 : getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
8173 68 : getInsideXSign(adfX0[iX + 1]) ==
8174 103574 : getInsideXSign(adfX1[iX + 1]) &&
8175 40 : (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
8176 : 0)
8177 : {
8178 40 : double dfXMidReprojectedLeftTop = 0;
8179 40 : double dfXMidReprojectedRightTop = 0;
8180 40 : double dfYMidReprojectedTop = 0;
8181 40 : FindDiscontinuity(
8182 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8183 80 : iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
8184 : dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
8185 : dfYMidReprojectedTop);
8186 40 : double dfXMidReprojectedLeftBottom = 0;
8187 40 : double dfXMidReprojectedRightBottom = 0;
8188 40 : double dfYMidReprojectedBottom = 0;
8189 40 : FindDiscontinuity(
8190 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8191 80 : iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
8192 : dfXMidReprojectedLeftBottom,
8193 : dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
8194 :
8195 40 : discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
8196 40 : discontinuityLeft[1] =
8197 80 : XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
8198 40 : discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
8199 40 : dfYMidReprojectedBottom);
8200 40 : discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
8201 40 : discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
8202 :
8203 40 : discontinuityRight[0] =
8204 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8205 40 : discontinuityRight[1] =
8206 80 : XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
8207 40 : discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
8208 40 : dfYMidReprojectedBottom);
8209 40 : discontinuityRight[3] =
8210 80 : XYPair(adfX1[iX + 1], adfY1[iX + 1]);
8211 40 : discontinuityRight[4] =
8212 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8213 :
8214 40 : sp.dfArea = getArea(discontinuityLeft) +
8215 40 : getArea(discontinuityRight);
8216 40 : if (getInsideXSign(adfX0[iX]) >= 1)
8217 : {
8218 20 : sp.dfDstX1 = dfXMidReprojectedLeftTop;
8219 20 : sp.dfDstY1 = dfYMidReprojectedTop;
8220 20 : sp.dfDstX2 = dfXMidReprojectedLeftBottom;
8221 20 : sp.dfDstY2 = dfYMidReprojectedBottom;
8222 : }
8223 : else
8224 : {
8225 20 : sp.dfDstX0 = dfXMidReprojectedRightTop;
8226 20 : sp.dfDstY0 = dfYMidReprojectedTop;
8227 20 : sp.dfDstX3 = dfXMidReprojectedRightBottom;
8228 20 : sp.dfDstY3 = dfYMidReprojectedBottom;
8229 : }
8230 : }
8231 :
8232 : // Bounding box of source pixel (expressed in target pixel
8233 : // coordinates)
8234 : CPLRectObj sRect;
8235 103410 : sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
8236 103410 : std::min(sp.dfDstX2, sp.dfDstX3));
8237 103410 : sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
8238 103410 : std::min(sp.dfDstY2, sp.dfDstY3));
8239 103410 : sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
8240 103410 : std::max(sp.dfDstX2, sp.dfDstX3));
8241 103410 : sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
8242 103410 : std::max(sp.dfDstY2, sp.dfDstY3));
8243 103410 : if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
8244 101350 : sRect.miny < iYMax && sRect.maxy > iYMin))
8245 : {
8246 10852 : continue;
8247 : }
8248 :
8249 92558 : sp.iSrcX = iX;
8250 92558 : sp.iSrcY = iY;
8251 :
8252 92558 : if (!bIsAffineNoRotation)
8253 : {
8254 : // Check polygon validity (no self-crossing)
8255 89745 : XYPair xy;
8256 89745 : if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
8257 89745 : XYPair(sp.dfDstX1, sp.dfDstY1),
8258 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8259 269235 : XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
8260 89745 : getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
8261 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8262 89745 : XYPair(sp.dfDstX0, sp.dfDstY0),
8263 179490 : XYPair(sp.dfDstX3, sp.dfDstY3), xy))
8264 : {
8265 113 : continue;
8266 : }
8267 : }
8268 :
8269 92445 : CPLQuadTreeInsertWithBounds(
8270 : hQuadTree,
8271 : reinterpret_cast<void *>(
8272 92445 : static_cast<uintptr_t>(sourcePixels.size())),
8273 : &sRect);
8274 :
8275 92445 : sourcePixels.push_back(sp);
8276 : }
8277 : }
8278 : }
8279 :
8280 36 : std::vector<double> adfRealValue(poWK->nBands);
8281 36 : std::vector<double> adfImagValue(poWK->nBands);
8282 36 : std::vector<double> adfBandDensity(poWK->nBands);
8283 36 : std::vector<double> adfWeight(poWK->nBands);
8284 :
8285 : #ifdef CHECK_SUM_WITH_GEOS
8286 : auto hGEOSContext = OGRGeometry::createGEOSContext();
8287 : auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8288 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
8289 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
8290 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
8291 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
8292 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
8293 : auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
8294 : auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
8295 :
8296 : auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8297 : auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
8298 : auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
8299 : #endif
8300 :
8301 : const XYPoly xy1{
8302 36 : {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
8303 36 : XYPoly xy2(5);
8304 36 : XYPoly xy2_triangle(4);
8305 36 : XYPoly intersection;
8306 :
8307 : /* ==================================================================== */
8308 : /* Loop over output lines. */
8309 : /* ==================================================================== */
8310 891 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
8311 : {
8312 : CPLRectObj sRect;
8313 873 : sRect.miny = iDstY;
8314 873 : sRect.maxy = iDstY + 1;
8315 :
8316 : /* ====================================================================
8317 : */
8318 : /* Loop over pixels in output scanline. */
8319 : /* ====================================================================
8320 : */
8321 221042 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
8322 : {
8323 220169 : sRect.minx = iDstX;
8324 220169 : sRect.maxx = iDstX + 1;
8325 220169 : int nSourcePixels = 0;
8326 : void **pahSourcePixel =
8327 220169 : CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
8328 220169 : if (nSourcePixels == 0)
8329 : {
8330 1258 : CPLFree(pahSourcePixel);
8331 1262 : continue;
8332 : }
8333 :
8334 218911 : std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
8335 218911 : std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
8336 218911 : std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
8337 218911 : std::fill(adfWeight.begin(), adfWeight.end(), 0);
8338 218911 : double dfDensity = 0;
8339 218911 : double dfTotalWeight = 0;
8340 :
8341 : /* ====================================================================
8342 : */
8343 : /* Iterate over each contributing source pixel to add its
8344 : */
8345 : /* value weighed by the ratio of the area of its
8346 : * intersection */
8347 : /* with the target pixel divided by the area of the source
8348 : */
8349 : /* pixel. */
8350 : /* ====================================================================
8351 : */
8352 1020520 : for (int i = 0; i < nSourcePixels; ++i)
8353 : {
8354 801614 : const int iSourcePixel = static_cast<int>(
8355 801614 : reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
8356 801614 : auto &sp = sourcePixels[iSourcePixel];
8357 :
8358 801614 : double dfWeight = 0.0;
8359 801614 : if (bIsAffineNoRotation)
8360 : {
8361 : // Optimization since the source pixel is a rectangle in
8362 : // target pixel coordinates
8363 16312 : double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
8364 16312 : double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
8365 16312 : double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
8366 16312 : double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
8367 16312 : double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
8368 16312 : double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
8369 16312 : double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
8370 16312 : double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
8371 16312 : dfWeight =
8372 16312 : ((dfIntersMaxX - dfIntersMinX) *
8373 16312 : (dfIntersMaxY - dfIntersMinY)) /
8374 16312 : ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
8375 : }
8376 : else
8377 : {
8378 : // Compute the polygon of the source pixel in target pixel
8379 : // coordinates, and shifted to the target pixel (unit square
8380 : // coordinates)
8381 :
8382 785302 : xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8383 785302 : xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
8384 785302 : xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
8385 785302 : xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
8386 785302 : xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8387 :
8388 785302 : if (isConvex(xy2))
8389 : {
8390 785302 : getConvexPolyIntersection(xy1, xy2, intersection);
8391 785302 : if (intersection.size() >= 3)
8392 : {
8393 468849 : dfWeight = getArea(intersection);
8394 : }
8395 : }
8396 : else
8397 : {
8398 : // Split xy2 into 2 triangles.
8399 0 : xy2_triangle[0] = xy2[0];
8400 0 : xy2_triangle[1] = xy2[1];
8401 0 : xy2_triangle[2] = xy2[2];
8402 0 : xy2_triangle[3] = xy2[0];
8403 0 : getConvexPolyIntersection(xy1, xy2_triangle,
8404 : intersection);
8405 0 : if (intersection.size() >= 3)
8406 : {
8407 0 : dfWeight = getArea(intersection);
8408 : }
8409 :
8410 0 : xy2_triangle[1] = xy2[2];
8411 0 : xy2_triangle[2] = xy2[3];
8412 0 : getConvexPolyIntersection(xy1, xy2_triangle,
8413 : intersection);
8414 0 : if (intersection.size() >= 3)
8415 : {
8416 0 : dfWeight += getArea(intersection);
8417 : }
8418 : }
8419 785302 : if (dfWeight > 0.0)
8420 : {
8421 468828 : if (sp.dfArea == 0)
8422 89592 : sp.dfArea = getArea(xy2);
8423 468828 : dfWeight /= sp.dfArea;
8424 : }
8425 :
8426 : #ifdef CHECK_SUM_WITH_GEOS
8427 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
8428 : sp.dfDstX0 - iDstX,
8429 : sp.dfDstY0 - iDstY);
8430 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
8431 : sp.dfDstX1 - iDstX,
8432 : sp.dfDstY1 - iDstY);
8433 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
8434 : sp.dfDstX2 - iDstX,
8435 : sp.dfDstY2 - iDstY);
8436 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
8437 : sp.dfDstX3 - iDstX,
8438 : sp.dfDstY3 - iDstY);
8439 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
8440 : sp.dfDstX0 - iDstX,
8441 : sp.dfDstY0 - iDstY);
8442 :
8443 : double dfWeightGEOS = 0.0;
8444 : auto hIntersection =
8445 : GEOSIntersection_r(hGEOSContext, hP1, hP2);
8446 : if (hIntersection)
8447 : {
8448 : double dfIntersArea = 0.0;
8449 : if (GEOSArea_r(hGEOSContext, hIntersection,
8450 : &dfIntersArea) &&
8451 : dfIntersArea > 0)
8452 : {
8453 : double dfSourceArea = 0.0;
8454 : if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
8455 : {
8456 : dfWeightGEOS = dfIntersArea / dfSourceArea;
8457 : }
8458 : }
8459 : GEOSGeom_destroy_r(hGEOSContext, hIntersection);
8460 : }
8461 : if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
8462 : {
8463 : /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
8464 : dfWeight, dfWeightGEOS);
8465 : printf("xy2: "); // ok
8466 : for (const auto &xy : xy2)
8467 : printf("[%f, %f], ", xy.first, xy.second); // ok
8468 : printf("\n"); // ok
8469 : printf("intersection: "); // ok
8470 : for (const auto &xy : intersection)
8471 : printf("[%f, %f], ", xy.first, xy.second); // ok
8472 : printf("\n"); // ok
8473 : }
8474 : #endif
8475 : }
8476 801614 : if (dfWeight > 0.0)
8477 : {
8478 474099 : const GPtrDiff_t iSrcOffset =
8479 474099 : sp.iSrcX +
8480 474099 : static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
8481 474099 : dfTotalWeight += dfWeight;
8482 :
8483 474099 : if (poWK->pafUnifiedSrcDensity != nullptr)
8484 : {
8485 0 : dfDensity +=
8486 0 : dfWeight * poWK->pafUnifiedSrcDensity[iSrcOffset];
8487 : }
8488 : else
8489 : {
8490 474099 : dfDensity += dfWeight;
8491 : }
8492 :
8493 1818720 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
8494 : {
8495 : // Returns pixel value if it is not no data.
8496 : double dfBandDensity;
8497 : double dfRealValue;
8498 : double dfImagValue;
8499 2689240 : if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
8500 : &dfBandDensity, &dfRealValue,
8501 : &dfImagValue) &&
8502 1344620 : dfBandDensity > BAND_DENSITY_THRESHOLD))
8503 : {
8504 0 : continue;
8505 : }
8506 :
8507 1344620 : adfRealValue[iBand] += dfRealValue * dfWeight;
8508 1344620 : adfImagValue[iBand] += dfImagValue * dfWeight;
8509 1344620 : adfBandDensity[iBand] += dfBandDensity * dfWeight;
8510 1344620 : adfWeight[iBand] += dfWeight;
8511 : }
8512 : }
8513 : }
8514 :
8515 218911 : CPLFree(pahSourcePixel);
8516 :
8517 : /* --------------------------------------------------------------------
8518 : */
8519 : /* Update destination pixel value. */
8520 : /* --------------------------------------------------------------------
8521 : */
8522 218911 : bool bHasFoundDensity = false;
8523 218911 : const GPtrDiff_t iDstOffset =
8524 218911 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
8525 827822 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
8526 : {
8527 608911 : if (adfWeight[iBand] > 0)
8528 : {
8529 : const double dfBandDensity =
8530 608907 : adfBandDensity[iBand] / adfWeight[iBand];
8531 608907 : if (dfBandDensity > BAND_DENSITY_THRESHOLD)
8532 : {
8533 608907 : bHasFoundDensity = true;
8534 608907 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8535 608907 : adfRealValue[iBand],
8536 608907 : adfImagValue[iBand]);
8537 : }
8538 : }
8539 : }
8540 :
8541 218911 : if (!bHasFoundDensity)
8542 4 : continue;
8543 :
8544 : /* --------------------------------------------------------------------
8545 : */
8546 : /* Update destination density/validity masks. */
8547 : /* --------------------------------------------------------------------
8548 : */
8549 218907 : GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
8550 :
8551 218907 : if (poWK->panDstValid != nullptr)
8552 : {
8553 11750 : CPLMaskSet(poWK->panDstValid, iDstOffset);
8554 : }
8555 : }
8556 :
8557 : /* --------------------------------------------------------------------
8558 : */
8559 : /* Report progress to the user, and optionally cancel out. */
8560 : /* --------------------------------------------------------------------
8561 : */
8562 873 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8563 0 : break;
8564 : }
8565 :
8566 : #ifdef CHECK_SUM_WITH_GEOS
8567 : GEOSGeom_destroy_r(hGEOSContext, hP1);
8568 : GEOSGeom_destroy_r(hGEOSContext, hP2);
8569 : OGRGeometry::freeGEOSContext(hGEOSContext);
8570 : #endif
8571 18 : CPLQuadTreeDestroy(hQuadTree);
8572 18 : }
|