Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: High Performance Image Reprojector
4 : * Purpose: Implementation of the GDALWarpKernel class. Implements the actual
5 : * image warping for a "chunk" of input and output imagery already
6 : * loaded into memory.
7 : * Author: Frank Warmerdam, warmerdam@pobox.com
8 : *
9 : ******************************************************************************
10 : * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
11 : * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
12 : *
13 : * SPDX-License-Identifier: MIT
14 : ****************************************************************************/
15 :
16 : #include "cpl_port.h"
17 : #include "gdalwarper.h"
18 :
19 : #include <cfloat>
20 : #include <cmath>
21 : #include <cstddef>
22 : #include <cstdlib>
23 : #include <cstring>
24 :
25 : #include <algorithm>
26 : #include <limits>
27 : #include <mutex>
28 : #include <new>
29 : #include <utility>
30 : #include <vector>
31 :
32 : #include "cpl_atomic_ops.h"
33 : #include "cpl_conv.h"
34 : #include "cpl_error.h"
35 : #include "cpl_mask.h"
36 : #include "cpl_multiproc.h"
37 : #include "cpl_progress.h"
38 : #include "cpl_string.h"
39 : #include "cpl_vsi.h"
40 : #include "cpl_worker_thread_pool.h"
41 : #include "cpl_quad_tree.h"
42 : #include "gdal.h"
43 : #include "gdal_alg.h"
44 : #include "gdal_alg_priv.h"
45 : #include "gdal_thread_pool.h"
46 : #include "gdalresamplingkernels.h"
47 : #include "gdalwarpkernel_opencl.h"
48 :
49 : // #define CHECK_SUM_WITH_GEOS
50 : #ifdef CHECK_SUM_WITH_GEOS
51 : #include "ogr_geometry.h"
52 : #include "ogr_geos.h"
53 : #endif
54 :
55 : // We restrict to 64bit processors because they are guaranteed to have SSE2.
56 : // Could possibly be used too on 32bit, but we would need to check at runtime.
57 : #if defined(__x86_64) || defined(_M_X64)
58 : #include "gdalsse_priv.h"
59 :
60 : #if __SSE4_1__
61 : #include <smmintrin.h>
62 : #endif
63 :
64 : #if __SSE3__
65 : #include <pmmintrin.h>
66 : #endif
67 :
68 : #endif
69 :
70 : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
71 : constexpr float SRC_DENSITY_THRESHOLD = 0.000000001f;
72 :
73 : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
74 :
75 : static const int anGWKFilterRadius[] = {
76 : 0, // Nearest neighbour
77 : 1, // Bilinear
78 : 2, // Cubic Convolution (Catmull-Rom)
79 : 2, // Cubic B-Spline
80 : 3, // Lanczos windowed sinc
81 : 0, // Average
82 : 0, // Mode
83 : 0, // Reserved GRA_Gauss=7
84 : 0, // Max
85 : 0, // Min
86 : 0, // Med
87 : 0, // Q1
88 : 0, // Q3
89 : 0, // Sum
90 : 0, // RMS
91 : };
92 :
93 : static double GWKBilinear(double dfX);
94 : static double GWKCubic(double dfX);
95 : static double GWKBSpline(double dfX);
96 : static double GWKLanczosSinc(double dfX);
97 :
98 : static const FilterFuncType apfGWKFilter[] = {
99 : nullptr, // Nearest neighbour
100 : GWKBilinear, // Bilinear
101 : GWKCubic, // Cubic Convolution (Catmull-Rom)
102 : GWKBSpline, // Cubic B-Spline
103 : GWKLanczosSinc, // Lanczos windowed sinc
104 : nullptr, // Average
105 : nullptr, // Mode
106 : nullptr, // Reserved GRA_Gauss=7
107 : nullptr, // Max
108 : nullptr, // Min
109 : nullptr, // Med
110 : nullptr, // Q1
111 : nullptr, // Q3
112 : nullptr, // Sum
113 : nullptr, // RMS
114 : };
115 :
116 : // TODO(schwehr): Can we make these functions have a const * const arg?
117 : static double GWKBilinear4Values(double *padfVals);
118 : static double GWKCubic4Values(double *padfVals);
119 : static double GWKBSpline4Values(double *padfVals);
120 : static double GWKLanczosSinc4Values(double *padfVals);
121 :
122 : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
123 : nullptr, // Nearest neighbour
124 : GWKBilinear4Values, // Bilinear
125 : GWKCubic4Values, // Cubic Convolution (Catmull-Rom)
126 : GWKBSpline4Values, // Cubic B-Spline
127 : GWKLanczosSinc4Values, // Lanczos windowed sinc
128 : nullptr, // Average
129 : nullptr, // Mode
130 : nullptr, // Reserved GRA_Gauss=7
131 : nullptr, // Max
132 : nullptr, // Min
133 : nullptr, // Med
134 : nullptr, // Q1
135 : nullptr, // Q3
136 : nullptr, // Sum
137 : nullptr, // RMS
138 : };
139 :
140 9525 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
141 : {
142 : static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
143 : "Bad size of anGWKFilterRadius");
144 9525 : return anGWKFilterRadius[eResampleAlg];
145 : }
146 :
147 3644 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
148 : {
149 : static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
150 : "Bad size of apfGWKFilter");
151 3644 : return apfGWKFilter[eResampleAlg];
152 : }
153 :
154 3646 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
155 : {
156 : static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
157 : "Bad size of apfGWKFilter4Values");
158 3646 : return apfGWKFilter4Values[eResampleAlg];
159 : }
160 :
161 : #ifdef HAVE_OPENCL
162 : static CPLErr GWKOpenCLCase(GDALWarpKernel *);
163 : #endif
164 :
165 : static CPLErr GWKGeneralCase(GDALWarpKernel *);
166 : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
167 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
168 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
169 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
170 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
171 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
172 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
173 : #endif
174 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
175 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
176 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
177 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
178 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
179 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
180 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
181 : #endif
182 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
183 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
184 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
185 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
186 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
187 : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
188 : static CPLErr GWKSumPreserving(GDALWarpKernel *);
189 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
190 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
191 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
192 :
193 : /************************************************************************/
194 : /* GWKJobStruct */
195 : /************************************************************************/
196 :
197 : struct GWKJobStruct
198 : {
199 : std::mutex &mutex;
200 : std::condition_variable &cv;
201 : int &counter;
202 : bool &stopFlag;
203 : GDALWarpKernel *poWK;
204 : int iYMin;
205 : int iYMax;
206 : int (*pfnProgress)(GWKJobStruct *psJob);
207 : void *pTransformerArg;
208 : void (*pfnFunc)(
209 : void *); // used by GWKRun() to assign the proper pTransformerArg
210 :
211 2015 : GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
212 : int &counter_, bool &stopFlag_)
213 2015 : : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_),
214 : poWK(nullptr), iYMin(0), iYMax(0), pfnProgress(nullptr),
215 2015 : pTransformerArg(nullptr), pfnFunc(nullptr)
216 : {
217 2015 : }
218 : };
219 :
220 : struct GWKThreadData
221 : {
222 : std::unique_ptr<CPLJobQueue> poJobQueue{};
223 : std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
224 : int nMaxThreads{0};
225 : int counter{0};
226 : bool stopFlag{false};
227 : std::mutex mutex{};
228 : std::condition_variable cv{};
229 : bool bTransformerArgInputAssignedToThread{false};
230 : void *pTransformerArgInput{
231 : nullptr}; // owned by calling layer. Not to be destroyed
232 : std::map<GIntBig, void *> mapThreadToTransformerArg{};
233 : int nTotalThreadCountForThisRun = 0;
234 : int nCurThreadCountForThisRun = 0;
235 : };
236 :
237 : /************************************************************************/
238 : /* GWKProgressThread() */
239 : /************************************************************************/
240 :
241 : // Return TRUE if the computation must be interrupted.
242 18 : static int GWKProgressThread(GWKJobStruct *psJob)
243 : {
244 18 : bool stop = false;
245 : {
246 18 : std::lock_guard<std::mutex> lock(psJob->mutex);
247 18 : psJob->counter++;
248 18 : stop = psJob->stopFlag;
249 : }
250 18 : psJob->cv.notify_one();
251 :
252 18 : return stop;
253 : }
254 :
255 : /************************************************************************/
256 : /* GWKProgressMonoThread() */
257 : /************************************************************************/
258 :
259 : // Return TRUE if the computation must be interrupted.
260 198631 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
261 : {
262 198631 : GDALWarpKernel *poWK = psJob->poWK;
263 : // coverity[missing_lock]
264 198631 : if (!poWK->pfnProgress(
265 198631 : poWK->dfProgressBase +
266 198631 : poWK->dfProgressScale *
267 198631 : (++psJob->counter / static_cast<double>(psJob->iYMax)),
268 : "", poWK->pProgress))
269 : {
270 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
271 1 : psJob->stopFlag = true;
272 1 : return TRUE;
273 : }
274 198630 : return FALSE;
275 : }
276 :
277 : /************************************************************************/
278 : /* GWKGenericMonoThread() */
279 : /************************************************************************/
280 :
281 2010 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
282 : void (*pfnFunc)(void *pUserData))
283 : {
284 2010 : GWKThreadData td;
285 :
286 : // NOTE: the mutex is not used.
287 2010 : GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
288 2010 : job.poWK = poWK;
289 2010 : job.iYMin = 0;
290 2010 : job.iYMax = poWK->nDstYSize;
291 2010 : job.pfnProgress = GWKProgressMonoThread;
292 2010 : job.pTransformerArg = poWK->pTransformerArg;
293 2010 : pfnFunc(&job);
294 :
295 4020 : return td.stopFlag ? CE_Failure : CE_None;
296 : }
297 :
298 : /************************************************************************/
299 : /* GWKThreadsCreate() */
300 : /************************************************************************/
301 :
302 1382 : void *GWKThreadsCreate(char **papszWarpOptions,
303 : GDALTransformerFunc /* pfnTransformer */,
304 : void *pTransformerArg)
305 : {
306 : const char *pszWarpThreads =
307 1382 : CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
308 1382 : if (pszWarpThreads == nullptr)
309 1382 : pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
310 :
311 1382 : int nThreads = 0;
312 1382 : if (EQUAL(pszWarpThreads, "ALL_CPUS"))
313 3 : nThreads = CPLGetNumCPUs();
314 : else
315 1379 : nThreads = atoi(pszWarpThreads);
316 1382 : if (nThreads <= 1)
317 1377 : nThreads = 0;
318 1382 : if (nThreads > 128)
319 0 : nThreads = 128;
320 :
321 1382 : GWKThreadData *psThreadData = new GWKThreadData();
322 : auto poThreadPool =
323 1382 : nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
324 1382 : if (nThreads && poThreadPool)
325 : {
326 5 : psThreadData->nMaxThreads = nThreads;
327 5 : psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
328 : nThreads,
329 5 : GWKJobStruct(psThreadData->mutex, psThreadData->cv,
330 10 : psThreadData->counter, psThreadData->stopFlag)));
331 :
332 5 : psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
333 5 : psThreadData->pTransformerArgInput = pTransformerArg;
334 : }
335 :
336 1382 : return psThreadData;
337 : }
338 :
339 : /************************************************************************/
340 : /* GWKThreadsEnd() */
341 : /************************************************************************/
342 :
343 1382 : void GWKThreadsEnd(void *psThreadDataIn)
344 : {
345 1382 : if (psThreadDataIn == nullptr)
346 0 : return;
347 :
348 1382 : GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
349 1382 : if (psThreadData->poJobQueue)
350 : {
351 : // cppcheck-suppress constVariableReference
352 15 : for (auto &pair : psThreadData->mapThreadToTransformerArg)
353 : {
354 10 : CPLAssert(pair.second != psThreadData->pTransformerArgInput);
355 10 : GDALDestroyTransformer(pair.second);
356 : }
357 5 : psThreadData->poJobQueue.reset();
358 : }
359 1382 : delete psThreadData;
360 : }
361 :
362 : /************************************************************************/
363 : /* ThreadFuncAdapter() */
364 : /************************************************************************/
365 :
366 15 : static void ThreadFuncAdapter(void *pData)
367 : {
368 15 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
369 15 : GWKThreadData *psThreadData =
370 15 : static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
371 :
372 : // Look if we have already a per-thread transformer
373 15 : void *pTransformerArg = nullptr;
374 15 : const GIntBig nThreadId = CPLGetPID();
375 :
376 : {
377 30 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
378 15 : ++psThreadData->nCurThreadCountForThisRun;
379 :
380 15 : auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
381 15 : if (oIter != psThreadData->mapThreadToTransformerArg.end())
382 : {
383 1 : pTransformerArg = oIter->second;
384 : }
385 14 : else if (!psThreadData->bTransformerArgInputAssignedToThread &&
386 14 : psThreadData->nCurThreadCountForThisRun ==
387 14 : psThreadData->nTotalThreadCountForThisRun)
388 : {
389 : // If we are the last thread to be started, temporarily borrow the
390 : // original transformer
391 4 : psThreadData->bTransformerArgInputAssignedToThread = true;
392 4 : pTransformerArg = psThreadData->pTransformerArgInput;
393 4 : psThreadData->mapThreadToTransformerArg[nThreadId] =
394 : pTransformerArg;
395 : }
396 :
397 15 : if (pTransformerArg == nullptr)
398 : {
399 10 : CPLAssert(psThreadData->pTransformerArgInput != nullptr);
400 10 : CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
401 : }
402 : }
403 :
404 : // If no transformer assigned to current thread, instantiate one
405 15 : if (pTransformerArg == nullptr)
406 : {
407 : // This somehow assumes that GDALCloneTransformer() is thread-safe
408 : // which should normally be the case.
409 : pTransformerArg =
410 10 : GDALCloneTransformer(psThreadData->pTransformerArgInput);
411 :
412 : // Lock for the stop flag and the transformer map.
413 10 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
414 10 : if (!pTransformerArg)
415 : {
416 0 : psJob->stopFlag = true;
417 0 : return;
418 : }
419 10 : psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
420 : }
421 :
422 15 : psJob->pTransformerArg = pTransformerArg;
423 15 : psJob->pfnFunc(pData);
424 :
425 : // Give back original transformer, if borrowed.
426 : {
427 30 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
428 15 : if (psThreadData->bTransformerArgInputAssignedToThread &&
429 5 : pTransformerArg == psThreadData->pTransformerArgInput)
430 : {
431 : psThreadData->mapThreadToTransformerArg.erase(
432 4 : psThreadData->mapThreadToTransformerArg.find(nThreadId));
433 4 : psThreadData->bTransformerArgInputAssignedToThread = false;
434 : }
435 : }
436 : }
437 :
438 : /************************************************************************/
439 : /* GWKRun() */
440 : /************************************************************************/
441 :
442 2015 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
443 : void (*pfnFunc)(void *pUserData))
444 :
445 : {
446 2015 : const int nDstYSize = poWK->nDstYSize;
447 :
448 2015 : CPLDebug("GDAL",
449 : "GDALWarpKernel()::%s() "
450 : "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
451 : pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
452 : poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
453 : poWK->nDstYSize);
454 :
455 2015 : if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
456 : {
457 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
458 0 : return CE_Failure;
459 : }
460 :
461 2015 : GWKThreadData *psThreadData =
462 : static_cast<GWKThreadData *>(poWK->psThreadData);
463 2015 : if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
464 : {
465 2010 : return GWKGenericMonoThread(poWK, pfnFunc);
466 : }
467 :
468 5 : int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
469 : // Config option mostly useful for tests to be able to test multithreading
470 : // with small rasters
471 : const int nWarpChunkSize =
472 5 : atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
473 5 : if (nWarpChunkSize > 0)
474 : {
475 3 : GIntBig nChunks =
476 3 : static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
477 3 : if (nThreads > nChunks)
478 1 : nThreads = static_cast<int>(nChunks);
479 : }
480 5 : if (nThreads <= 0)
481 1 : nThreads = 1;
482 :
483 5 : CPLDebug("WARP", "Using %d threads", nThreads);
484 :
485 5 : auto &jobs = *psThreadData->threadJobs;
486 5 : CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
487 : // Fill-in job structures.
488 20 : for (int i = 0; i < nThreads; ++i)
489 : {
490 15 : auto &job = jobs[i];
491 15 : job.poWK = poWK;
492 15 : job.iYMin =
493 15 : static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
494 15 : job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
495 15 : nThreads);
496 15 : if (poWK->pfnProgress != GDALDummyProgress)
497 1 : job.pfnProgress = GWKProgressThread;
498 15 : job.pfnFunc = pfnFunc;
499 : }
500 :
501 : bool bStopFlag;
502 : {
503 5 : std::unique_lock<std::mutex> lock(psThreadData->mutex);
504 :
505 5 : psThreadData->nTotalThreadCountForThisRun = nThreads;
506 : // coverity[missing_lock]
507 5 : psThreadData->nCurThreadCountForThisRun = 0;
508 :
509 : // Start jobs.
510 20 : for (int i = 0; i < nThreads; ++i)
511 : {
512 15 : auto &job = jobs[i];
513 15 : psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
514 : static_cast<void *>(&job));
515 : }
516 :
517 : /* --------------------------------------------------------------------
518 : */
519 : /* Report progress. */
520 : /* --------------------------------------------------------------------
521 : */
522 5 : if (poWK->pfnProgress != GDALDummyProgress)
523 : {
524 1 : while (psThreadData->counter < nDstYSize)
525 : {
526 1 : psThreadData->cv.wait(lock);
527 1 : if (!poWK->pfnProgress(poWK->dfProgressBase +
528 1 : poWK->dfProgressScale *
529 1 : (psThreadData->counter /
530 1 : static_cast<double>(nDstYSize)),
531 : "", poWK->pProgress))
532 : {
533 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
534 1 : psThreadData->stopFlag = true;
535 1 : break;
536 : }
537 : }
538 : }
539 :
540 5 : bStopFlag = psThreadData->stopFlag;
541 : }
542 :
543 : /* -------------------------------------------------------------------- */
544 : /* Wait for all jobs to complete. */
545 : /* -------------------------------------------------------------------- */
546 5 : psThreadData->poJobQueue->WaitCompletion();
547 :
548 5 : return bStopFlag ? CE_Failure : CE_None;
549 : }
550 :
551 : /************************************************************************/
552 : /* ==================================================================== */
553 : /* GDALWarpKernel */
554 : /* ==================================================================== */
555 : /************************************************************************/
556 :
557 : /**
558 : * \class GDALWarpKernel "gdalwarper.h"
559 : *
560 : * Low level image warping class.
561 : *
562 : * This class is responsible for low level image warping for one
563 : * "chunk" of imagery. The class is essentially a structure with all
564 : * data members public - primarily so that new special-case functions
565 : * can be added without changing the class declaration.
566 : *
567 : * Applications are normally intended to interactive with warping facilities
568 : * through the GDALWarpOperation class, though the GDALWarpKernel can in
569 : * theory be used directly if great care is taken in setting up the
570 : * control data.
571 : *
572 : * <h3>Design Issues</h3>
573 : *
574 : * The intention is that PerformWarp() would analyze the setup in terms
575 : * of the datatype, resampling type, and validity/density mask usage and
576 : * pick one of many specific implementations of the warping algorithm over
577 : * a continuum of optimization vs. generality. At one end there will be a
578 : * reference general purpose implementation of the algorithm that supports
579 : * any data type (working internally in double precision complex), all three
580 : * resampling types, and any or all of the validity/density masks. At the
581 : * other end would be highly optimized algorithms for common cases like
582 : * nearest neighbour resampling on GDT_Byte data with no masks.
583 : *
584 : * The full set of optimized versions have not been decided but we should
585 : * expect to have at least:
586 : * - One for each resampling algorithm for 8bit data with no masks.
587 : * - One for each resampling algorithm for float data with no masks.
588 : * - One for each resampling algorithm for float data with any/all masks
589 : * (essentially the generic case for just float data).
590 : * - One for each resampling algorithm for 8bit data with support for
591 : * input validity masks (per band or per pixel). This handles the common
592 : * case of nodata masking.
593 : * - One for each resampling algorithm for float data with support for
594 : * input validity masks (per band or per pixel). This handles the common
595 : * case of nodata masking.
596 : *
597 : * Some of the specializations would operate on all bands in one pass
598 : * (especially the ones without masking would do this), while others might
599 : * process each band individually to reduce code complexity.
600 : *
601 : * <h3>Masking Semantics</h3>
602 : *
603 : * A detailed explanation of the semantics of the validity and density masks,
604 : * and their effects on resampling kernels is needed here.
605 : */
606 :
607 : /************************************************************************/
608 : /* GDALWarpKernel Data Members */
609 : /************************************************************************/
610 :
611 : /**
612 : * \var GDALResampleAlg GDALWarpKernel::eResample;
613 : *
614 : * Resampling algorithm.
615 : *
616 : * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear,
617 : * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
618 : * GRA_Mode or GRA_Sum.
619 : *
620 : * This field is required. GDT_NearestNeighbour may be used as a default
621 : * value.
622 : */
623 :
624 : /**
625 : * \var GDALDataType GDALWarpKernel::eWorkingDataType;
626 : *
627 : * Working pixel data type.
628 : *
629 : * The datatype of pixels in the source image (papabySrcimage) and
630 : * destination image (papabyDstImage) buffers. Note that operations on
631 : * some data types (such as GDT_Byte) may be much better optimized than other
632 : * less common cases.
633 : *
634 : * This field is required. It may not be GDT_Unknown.
635 : */
636 :
637 : /**
638 : * \var int GDALWarpKernel::nBands;
639 : *
640 : * Number of bands.
641 : *
642 : * The number of bands (layers) of imagery being warped. Determines the
643 : * number of entries in the papabySrcImage, papanBandSrcValid,
644 : * and papabyDstImage arrays.
645 : *
646 : * This field is required.
647 : */
648 :
649 : /**
650 : * \var int GDALWarpKernel::nSrcXSize;
651 : *
652 : * Source image width in pixels.
653 : *
654 : * This field is required.
655 : */
656 :
657 : /**
658 : * \var int GDALWarpKernel::nSrcYSize;
659 : *
660 : * Source image height in pixels.
661 : *
662 : * This field is required.
663 : */
664 :
665 : /**
666 : * \var double GDALWarpKernel::dfSrcXExtraSize;
667 : *
668 : * Number of pixels included in nSrcXSize that are present on the edges of
669 : * the area of interest to take into account the width of the kernel.
670 : *
671 : * This field is required.
672 : */
673 :
674 : /**
675 : * \var double GDALWarpKernel::dfSrcYExtraSize;
676 : *
677 : * Number of pixels included in nSrcYExtraSize that are present on the edges of
678 : * the area of interest to take into account the height of the kernel.
679 : *
680 : * This field is required.
681 : */
682 :
683 : /**
684 : * \var int GDALWarpKernel::papabySrcImage;
685 : *
686 : * Array of source image band data.
687 : *
688 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
689 : * to image data. Each individual band of image data is organized as a single
690 : * block of image data in left to right, then bottom to top order. The actual
691 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
692 : *
693 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
694 : * the second band with eWorkingDataType set to GDT_Float32 use code like
695 : * this:
696 : *
697 : * \code
698 : * float dfPixelValue;
699 : * int nBand = 2-1; // Band indexes are zero based.
700 : * int nPixel = 3; // Zero based.
701 : * int nLine = 4; // Zero based.
702 : *
703 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
704 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
705 : * assert( nBand >= 0 && nBand < poKern->nBands );
706 : * dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
707 : * [nPixel + nLine * poKern->nSrcXSize];
708 : * \endcode
709 : *
710 : * This field is required.
711 : */
712 :
713 : /**
714 : * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
715 : *
716 : * Per band validity mask for source pixels.
717 : *
718 : * Array of pixel validity mask layers for each source band. Each of
719 : * the mask layers is the same size (in pixels) as the source image with
720 : * one bit per pixel. Note that it is legal (and common) for this to be
721 : * NULL indicating that none of the pixels are invalidated, or for some
722 : * band validity masks to be NULL in which case all pixels of the band are
723 : * valid. The following code can be used to test the validity of a particular
724 : * pixel.
725 : *
726 : * \code
727 : * int bIsValid = TRUE;
728 : * int nBand = 2-1; // Band indexes are zero based.
729 : * int nPixel = 3; // Zero based.
730 : * int nLine = 4; // Zero based.
731 : *
732 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
733 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
734 : * assert( nBand >= 0 && nBand < poKern->nBands );
735 : *
736 : * if( poKern->papanBandSrcValid != NULL
737 : * && poKern->papanBandSrcValid[nBand] != NULL )
738 : * {
739 : * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
740 : * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
741 : *
742 : * bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
743 : * }
744 : * \endcode
745 : */
746 :
747 : /**
748 : * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
749 : *
750 : * Per pixel validity mask for source pixels.
751 : *
752 : * A single validity mask layer that applies to the pixels of all source
753 : * bands. It is accessed similarly to papanBandSrcValid, but without the
754 : * extra level of band indirection.
755 : *
756 : * This pointer may be NULL indicating that all pixels are valid.
757 : *
758 : * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
759 : * the pixel isn't considered to be valid unless both arrays indicate it is
760 : * valid.
761 : */
762 :
763 : /**
764 : * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
765 : *
766 : * Per pixel density mask for source pixels.
767 : *
768 : * A single density mask layer that applies to the pixels of all source
769 : * bands. It contains values between 0.0 and 1.0 indicating the degree to
770 : * which this pixel should be allowed to contribute to the output result.
771 : *
772 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
773 : *
774 : * The density for a pixel may be accessed like this:
775 : *
776 : * \code
777 : * float fDensity = 1.0;
778 : * int nPixel = 3; // Zero based.
779 : * int nLine = 4; // Zero based.
780 : *
781 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
782 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
783 : * if( poKern->pafUnifiedSrcDensity != NULL )
784 : * fDensity = poKern->pafUnifiedSrcDensity
785 : * [nPixel + nLine * poKern->nSrcXSize];
786 : * \endcode
787 : */
788 :
789 : /**
790 : * \var int GDALWarpKernel::nDstXSize;
791 : *
792 : * Width of destination image in pixels.
793 : *
794 : * This field is required.
795 : */
796 :
797 : /**
798 : * \var int GDALWarpKernel::nDstYSize;
799 : *
800 : * Height of destination image in pixels.
801 : *
802 : * This field is required.
803 : */
804 :
805 : /**
806 : * \var GByte **GDALWarpKernel::papabyDstImage;
807 : *
808 : * Array of destination image band data.
809 : *
810 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
811 : * to image data. Each individual band of image data is organized as a single
812 : * block of image data in left to right, then bottom to top order. The actual
813 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
814 : *
815 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
816 : * the second band with eWorkingDataType set to GDT_Float32 use code like
817 : * this:
818 : *
819 : * \code
820 : * float dfPixelValue;
821 : * int nBand = 2-1; // Band indexes are zero based.
822 : * int nPixel = 3; // Zero based.
823 : * int nLine = 4; // Zero based.
824 : *
825 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
826 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
827 : * assert( nBand >= 0 && nBand < poKern->nBands );
828 : * dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
829 : * [nPixel + nLine * poKern->nSrcYSize];
830 : * \endcode
831 : *
832 : * This field is required.
833 : */
834 :
835 : /**
836 : * \var GUInt32 *GDALWarpKernel::panDstValid;
837 : *
838 : * Per pixel validity mask for destination pixels.
839 : *
840 : * A single validity mask layer that applies to the pixels of all destination
841 : * bands. It is accessed similarly to papanUnitifiedSrcValid, but based
842 : * on the size of the destination image.
843 : *
844 : * This pointer may be NULL indicating that all pixels are valid.
845 : */
846 :
847 : /**
848 : * \var float *GDALWarpKernel::pafDstDensity;
849 : *
850 : * Per pixel density mask for destination pixels.
851 : *
852 : * A single density mask layer that applies to the pixels of all destination
853 : * bands. It contains values between 0.0 and 1.0.
854 : *
855 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
856 : *
857 : * The density for a pixel may be accessed like this:
858 : *
859 : * \code
860 : * float fDensity = 1.0;
861 : * int nPixel = 3; // Zero based.
862 : * int nLine = 4; // Zero based.
863 : *
864 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
865 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
866 : * if( poKern->pafDstDensity != NULL )
867 : * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
868 : * \endcode
869 : */
870 :
871 : /**
872 : * \var int GDALWarpKernel::nSrcXOff;
873 : *
874 : * X offset to source pixel coordinates for transformation.
875 : *
876 : * See pfnTransformer.
877 : *
878 : * This field is required.
879 : */
880 :
881 : /**
882 : * \var int GDALWarpKernel::nSrcYOff;
883 : *
884 : * Y offset to source pixel coordinates for transformation.
885 : *
886 : * See pfnTransformer.
887 : *
888 : * This field is required.
889 : */
890 :
891 : /**
892 : * \var int GDALWarpKernel::nDstXOff;
893 : *
894 : * X offset to destination pixel coordinates for transformation.
895 : *
896 : * See pfnTransformer.
897 : *
898 : * This field is required.
899 : */
900 :
901 : /**
902 : * \var int GDALWarpKernel::nDstYOff;
903 : *
904 : * Y offset to destination pixel coordinates for transformation.
905 : *
906 : * See pfnTransformer.
907 : *
908 : * This field is required.
909 : */
910 :
911 : /**
912 : * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
913 : *
914 : * Source/destination location transformer.
915 : *
916 : * The function to call to transform coordinates between source image
917 : * pixel/line coordinates and destination image pixel/line coordinates.
918 : * See GDALTransformerFunc() for details of the semantics of this function.
919 : *
920 : * The GDALWarpKern algorithm will only ever use this transformer in
921 : * "destination to source" mode (bDstToSrc=TRUE), and will always pass
922 : * partial or complete scanlines of points in the destination image as
923 : * input. This means, among other things, that it is safe to the
924 : * approximating transform GDALApproxTransform() as the transformation
925 : * function.
926 : *
927 : * Source and destination images may be subsets of a larger overall image.
928 : * The transformation algorithms will expect and return pixel/line coordinates
929 : * in terms of this larger image, so coordinates need to be offset by
930 : * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
931 : * passing to pfnTransformer, and after return from it.
932 : *
933 : * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
934 : * data to this function when it is called.
935 : *
936 : * This field is required.
937 : */
938 :
939 : /**
940 : * \var void *GDALWarpKernel::pTransformerArg;
941 : *
942 : * Callback data for pfnTransformer.
943 : *
944 : * This field may be NULL if not required for the pfnTransformer being used.
945 : */
946 :
947 : /**
948 : * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
949 : *
950 : * The function to call to report progress of the algorithm, and to check
951 : * for a requested termination of the operation. It operates according to
952 : * GDALProgressFunc() semantics.
953 : *
954 : * Generally speaking the progress function will be invoked for each
955 : * scanline of the destination buffer that has been processed.
956 : *
957 : * This field may be NULL (internally set to GDALDummyProgress()).
958 : */
959 :
960 : /**
961 : * \var void *GDALWarpKernel::pProgress;
962 : *
963 : * Callback data for pfnProgress.
964 : *
965 : * This field may be NULL if not required for the pfnProgress being used.
966 : */
967 :
968 : /************************************************************************/
969 : /* GDALWarpKernel() */
970 : /************************************************************************/
971 :
972 2321 : GDALWarpKernel::GDALWarpKernel()
973 : : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
974 : eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
975 : dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
976 : papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
977 : pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
978 : papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
979 : dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
980 : nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
981 : nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
982 : pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
983 : pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
984 2321 : padfDstNoDataReal(nullptr), psThreadData(nullptr)
985 : {
986 2321 : }
987 :
988 : /************************************************************************/
989 : /* ~GDALWarpKernel() */
990 : /************************************************************************/
991 :
992 2321 : GDALWarpKernel::~GDALWarpKernel()
993 : {
994 2321 : }
995 :
996 : /************************************************************************/
997 : /* PerformWarp() */
998 : /************************************************************************/
999 :
1000 : /**
1001 : * \fn CPLErr GDALWarpKernel::PerformWarp();
1002 : *
1003 : * This method performs the warp described in the GDALWarpKernel.
1004 : *
1005 : * @return CE_None on success or CE_Failure if an error occurs.
1006 : */
1007 :
1008 2319 : CPLErr GDALWarpKernel::PerformWarp()
1009 :
1010 : {
1011 2319 : const CPLErr eErr = Validate();
1012 :
1013 2319 : if (eErr != CE_None)
1014 1 : return eErr;
1015 :
1016 : // See #2445 and #3079.
1017 2318 : if (nSrcXSize <= 0 || nSrcYSize <= 0)
1018 : {
1019 303 : if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
1020 : {
1021 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
1022 0 : return CE_Failure;
1023 : }
1024 303 : return CE_None;
1025 : }
1026 :
1027 : /* -------------------------------------------------------------------- */
1028 : /* Pre-calculate resampling scales and window sizes for filtering. */
1029 : /* -------------------------------------------------------------------- */
1030 :
1031 2015 : dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
1032 2015 : dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
1033 2015 : if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
1034 1262 : dfXScale = 1.0;
1035 2015 : if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
1036 1015 : dfYScale = 1.0;
1037 2015 : if (dfXScale < 1.0)
1038 : {
1039 536 : double dfXReciprocalScale = 1.0 / dfXScale;
1040 536 : const int nXReciprocalScale =
1041 536 : static_cast<int>(dfXReciprocalScale + 0.5);
1042 536 : if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
1043 419 : dfXScale = 1.0 / nXReciprocalScale;
1044 : }
1045 2015 : if (dfYScale < 1.0)
1046 : {
1047 503 : double dfYReciprocalScale = 1.0 / dfYScale;
1048 503 : const int nYReciprocalScale =
1049 503 : static_cast<int>(dfYReciprocalScale + 0.5);
1050 503 : if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
1051 356 : dfYScale = 1.0 / nYReciprocalScale;
1052 : }
1053 :
1054 : // XSCALE and YSCALE undocumented for now. Can help in some cases.
1055 : // Best would probably be a per-pixel scale computation.
1056 2015 : const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
1057 2015 : if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
1058 1 : dfXScale = CPLAtof(pszXScale);
1059 2015 : const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
1060 2015 : if (pszYScale != nullptr)
1061 1 : dfYScale = CPLAtof(pszYScale);
1062 :
1063 : // If the xscale is significantly lower than the yscale, this is highly
1064 : // suspicious of a situation of wrapping a very large virtual file in
1065 : // geographic coordinates with left and right parts being close to the
1066 : // antimeridian. In that situation, the xscale computed by the above method
1067 : // is completely wrong. Prefer doing an average of a few sample points
1068 : // instead
1069 2015 : if ((dfYScale / dfXScale > 100 ||
1070 1 : (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
1071 : {
1072 : // Sample points along a grid
1073 4 : const int nPointsX = std::min(10, nDstXSize);
1074 4 : const int nPointsY = std::min(10, nDstYSize);
1075 4 : const int nPoints = 3 * nPointsX * nPointsY;
1076 8 : std::vector<double> padfX;
1077 8 : std::vector<double> padfY;
1078 8 : std::vector<double> padfZ(nPoints);
1079 8 : std::vector<int> pabSuccess(nPoints);
1080 44 : for (int iY = 0; iY < nPointsY; iY++)
1081 : {
1082 440 : for (int iX = 0; iX < nPointsX; iX++)
1083 : {
1084 400 : const double dfX =
1085 : nPointsX == 1
1086 400 : ? 0.0
1087 400 : : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
1088 400 : const double dfY =
1089 : nPointsY == 1
1090 400 : ? 0.0
1091 400 : : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
1092 :
1093 : // Reproject each destination sample point and its neighbours
1094 : // at (x+1,y) and (x,y+1), so as to get the local scale.
1095 400 : padfX.push_back(dfX);
1096 400 : padfY.push_back(dfY);
1097 :
1098 400 : padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
1099 400 : padfY.push_back(dfY);
1100 :
1101 400 : padfX.push_back(dfX);
1102 400 : padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
1103 : }
1104 : }
1105 4 : pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
1106 4 : &padfZ[0], &pabSuccess[0]);
1107 :
1108 : // Compute the xscale at each sampling point
1109 8 : std::vector<double> adfXScales;
1110 404 : for (int i = 0; i < nPoints; i += 3)
1111 : {
1112 400 : if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
1113 : {
1114 : const double dfPointXScale =
1115 400 : 1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
1116 800 : std::abs(padfX[i + 2] - padfX[i]));
1117 400 : adfXScales.push_back(dfPointXScale);
1118 : }
1119 : }
1120 :
1121 : // Sort by increasing xcale
1122 4 : std::sort(adfXScales.begin(), adfXScales.end());
1123 :
1124 4 : if (!adfXScales.empty())
1125 : {
1126 : // Compute the average of scales, but eliminate outliers small
1127 : // scales, if some samples are just along the discontinuity.
1128 4 : const double dfMaxPointXScale = adfXScales.back();
1129 4 : double dfSumPointXScale = 0;
1130 4 : int nCountPointScale = 0;
1131 404 : for (double dfPointXScale : adfXScales)
1132 : {
1133 400 : if (dfPointXScale > dfMaxPointXScale / 10)
1134 : {
1135 398 : dfSumPointXScale += dfPointXScale;
1136 398 : nCountPointScale++;
1137 : }
1138 : }
1139 4 : if (nCountPointScale > 0) // should always be true
1140 : {
1141 4 : const double dfXScaleFromSampling =
1142 4 : dfSumPointXScale / nCountPointScale;
1143 : #if DEBUG_VERBOSE
1144 : CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
1145 : dfXScaleFromSampling);
1146 : #endif
1147 4 : dfXScale = dfXScaleFromSampling;
1148 : }
1149 : }
1150 : }
1151 :
1152 : #if DEBUG_VERBOSE
1153 : CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
1154 : #endif
1155 :
1156 2015 : const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
1157 :
1158 : // Safety check for callers that would use GDALWarpKernel without using
1159 : // GDALWarpOperation.
1160 1952 : if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
1161 1889 : ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
1162 4030 : !bUse4SamplesFormula)) &&
1163 388 : atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
1164 : WARP_EXTRA_ELTS)
1165 : {
1166 0 : CPLError(CE_Failure, CPLE_AppDefined,
1167 : "Source arrays must have WARP_EXTRA_ELTS extra elements at "
1168 : "their end. "
1169 : "See GDALWarpKernel class definition. If this condition is "
1170 : "fulfilled, define a EXTRA_ELTS=%d warp options",
1171 : WARP_EXTRA_ELTS);
1172 0 : return CE_Failure;
1173 : }
1174 :
1175 2015 : dfXFilter = anGWKFilterRadius[eResample];
1176 2015 : dfYFilter = anGWKFilterRadius[eResample];
1177 :
1178 2015 : nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
1179 1561 : : static_cast<int>(dfXFilter);
1180 2015 : nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
1181 1540 : : static_cast<int>(dfYFilter);
1182 :
1183 : // Filter window offset depends on the parity of the kernel radius.
1184 2015 : nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
1185 2015 : nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
1186 :
1187 2015 : bApplyVerticalShift =
1188 2015 : CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
1189 2015 : dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
1190 2015 : papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
1191 :
1192 : /* -------------------------------------------------------------------- */
1193 : /* Set up resampling functions. */
1194 : /* -------------------------------------------------------------------- */
1195 2015 : if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
1196 12 : return GWKGeneralCase(this);
1197 :
1198 : #if defined(HAVE_OPENCL)
1199 559 : if ((eWorkingDataType == GDT_Byte || eWorkingDataType == GDT_CInt16 ||
1200 387 : eWorkingDataType == GDT_UInt16 || eWorkingDataType == GDT_Int16 ||
1201 258 : eWorkingDataType == GDT_CFloat32 || eWorkingDataType == GDT_Float32) &&
1202 1865 : (eResample == GRA_Bilinear || eResample == GRA_Cubic ||
1203 1427 : eResample == GRA_CubicSpline || eResample == GRA_Lanczos) &&
1204 4527 : !bApplyVerticalShift &&
1205 : // OpenCL warping gives different results than the ones expected by autotest,
1206 : // so disable it by default even if found.
1207 1042 : CPLTestBool(
1208 521 : CSLFetchNameValueDef(papszWarpOptions, "USE_OPENCL",
1209 : CPLGetConfigOption("GDAL_USE_OPENCL", "NO"))))
1210 : {
1211 0 : if (pafUnifiedSrcDensity != nullptr)
1212 : {
1213 : // If pafUnifiedSrcDensity is only set to 1.0, then we can
1214 : // discard it.
1215 0 : bool bFoundNotOne = false;
1216 0 : for (GPtrDiff_t j = 0;
1217 0 : j < static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize; j++)
1218 : {
1219 0 : if (pafUnifiedSrcDensity[j] != 1.0)
1220 : {
1221 0 : bFoundNotOne = true;
1222 0 : break;
1223 : }
1224 : }
1225 0 : if (!bFoundNotOne)
1226 : {
1227 0 : CPLFree(pafUnifiedSrcDensity);
1228 0 : pafUnifiedSrcDensity = nullptr;
1229 : }
1230 : }
1231 :
1232 0 : if (pafUnifiedSrcDensity != nullptr)
1233 : {
1234 : // Typically if there's a cutline or an alpha band
1235 : static bool bHasWarned = false;
1236 0 : if (!bHasWarned)
1237 : {
1238 0 : bHasWarned = true;
1239 0 : CPLDebug("WARP", "pafUnifiedSrcDensity is not null, "
1240 : "hence OpenCL warper cannot be used");
1241 : }
1242 : }
1243 : else
1244 : {
1245 0 : const CPLErr eResult = GWKOpenCLCase(this);
1246 :
1247 : // CE_Warning tells us a suitable OpenCL environment was not available
1248 : // so we fall through to other CPU based methods.
1249 0 : if (eResult != CE_Warning)
1250 0 : return eResult;
1251 : }
1252 : }
1253 : #endif // defined HAVE_OPENCL
1254 :
1255 2003 : const bool bNoMasksOrDstDensityOnly =
1256 1999 : papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
1257 4002 : pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
1258 :
1259 2003 : if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour &&
1260 : bNoMasksOrDstDensityOnly)
1261 860 : return GWKNearestNoMasksOrDstDensityOnlyByte(this);
1262 :
1263 1143 : if (eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear &&
1264 : bNoMasksOrDstDensityOnly)
1265 125 : return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
1266 :
1267 1018 : if (eWorkingDataType == GDT_Byte && eResample == GRA_Cubic &&
1268 : bNoMasksOrDstDensityOnly)
1269 72 : return GWKCubicNoMasksOrDstDensityOnlyByte(this);
1270 :
1271 946 : if (eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline &&
1272 : bNoMasksOrDstDensityOnly)
1273 12 : return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
1274 :
1275 934 : if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour)
1276 276 : return GWKNearestByte(this);
1277 :
1278 658 : if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1279 129 : eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
1280 18 : return GWKNearestNoMasksOrDstDensityOnlyShort(this);
1281 :
1282 640 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
1283 : bNoMasksOrDstDensityOnly)
1284 5 : return GWKCubicNoMasksOrDstDensityOnlyShort(this);
1285 :
1286 635 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
1287 : bNoMasksOrDstDensityOnly)
1288 6 : return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
1289 :
1290 629 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
1291 : bNoMasksOrDstDensityOnly)
1292 18 : return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
1293 :
1294 611 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
1295 : bNoMasksOrDstDensityOnly)
1296 12 : return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
1297 :
1298 599 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
1299 : bNoMasksOrDstDensityOnly)
1300 5 : return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
1301 :
1302 594 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
1303 : bNoMasksOrDstDensityOnly)
1304 6 : return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
1305 :
1306 588 : if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1307 59 : eResample == GRA_NearestNeighbour)
1308 27 : return GWKNearestShort(this);
1309 :
1310 561 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
1311 : bNoMasksOrDstDensityOnly)
1312 11 : return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
1313 :
1314 550 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
1315 36 : return GWKNearestFloat(this);
1316 :
1317 514 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
1318 : bNoMasksOrDstDensityOnly)
1319 5 : return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
1320 :
1321 509 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
1322 : bNoMasksOrDstDensityOnly)
1323 9 : return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
1324 :
1325 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
1326 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
1327 : bNoMasksOrDstDensityOnly)
1328 : return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
1329 :
1330 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
1331 : bNoMasksOrDstDensityOnly)
1332 : return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
1333 : #endif
1334 :
1335 500 : if (eResample == GRA_Average)
1336 71 : return GWKAverageOrMode(this);
1337 :
1338 429 : if (eResample == GRA_RMS)
1339 9 : return GWKAverageOrMode(this);
1340 :
1341 420 : if (eResample == GRA_Mode)
1342 11 : return GWKAverageOrMode(this);
1343 :
1344 409 : if (eResample == GRA_Max)
1345 6 : return GWKAverageOrMode(this);
1346 :
1347 403 : if (eResample == GRA_Min)
1348 5 : return GWKAverageOrMode(this);
1349 :
1350 398 : if (eResample == GRA_Med)
1351 6 : return GWKAverageOrMode(this);
1352 :
1353 392 : if (eResample == GRA_Q1)
1354 5 : return GWKAverageOrMode(this);
1355 :
1356 387 : if (eResample == GRA_Q3)
1357 5 : return GWKAverageOrMode(this);
1358 :
1359 382 : if (eResample == GRA_Sum)
1360 18 : return GWKSumPreserving(this);
1361 :
1362 364 : if (!GDALDataTypeIsComplex(eWorkingDataType))
1363 : {
1364 133 : return GWKRealCase(this);
1365 : }
1366 :
1367 231 : return GWKGeneralCase(this);
1368 : }
1369 :
1370 : /************************************************************************/
1371 : /* Validate() */
1372 : /************************************************************************/
1373 :
1374 : /**
1375 : * \fn CPLErr GDALWarpKernel::Validate()
1376 : *
1377 : * Check the settings in the GDALWarpKernel, and issue a CPLError()
1378 : * (and return CE_Failure) if the configuration is considered to be
1379 : * invalid for some reason.
1380 : *
1381 : * This method will also do some standard defaulting such as setting
1382 : * pfnProgress to GDALDummyProgress() if it is NULL.
1383 : *
1384 : * @return CE_None on success or CE_Failure if an error is detected.
1385 : */
1386 :
1387 2319 : CPLErr GDALWarpKernel::Validate()
1388 :
1389 : {
1390 2319 : if (static_cast<size_t>(eResample) >=
1391 : (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
1392 : {
1393 0 : CPLError(CE_Failure, CPLE_AppDefined,
1394 : "Unsupported resampling method %d.",
1395 0 : static_cast<int>(eResample));
1396 0 : return CE_Failure;
1397 : }
1398 :
1399 : // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
1400 : // be ignored as contributing source pixels during resampling. Only taken into account by
1401 : // Average currently
1402 : const char *pszExcludedValues =
1403 2319 : CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
1404 2319 : if (pszExcludedValues)
1405 : {
1406 : const CPLStringList aosTokens(
1407 8 : CSLTokenizeString2(pszExcludedValues, "(,)", 0));
1408 8 : if ((aosTokens.size() % nBands) != 0)
1409 : {
1410 1 : CPLError(CE_Failure, CPLE_AppDefined,
1411 : "EXCLUDED_VALUES should contain one or several tuples of "
1412 : "%d values formatted like <R>,<G>,<B> or "
1413 : "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
1414 : "tuples",
1415 : nBands);
1416 1 : return CE_Failure;
1417 : }
1418 14 : std::vector<double> adfTuple;
1419 28 : for (int i = 0; i < aosTokens.size(); ++i)
1420 : {
1421 21 : adfTuple.push_back(CPLAtof(aosTokens[i]));
1422 21 : if (((i + 1) % nBands) == 0)
1423 : {
1424 7 : m_aadfExcludedValues.push_back(adfTuple);
1425 7 : adfTuple.clear();
1426 : }
1427 : }
1428 : }
1429 :
1430 2318 : return CE_None;
1431 : }
1432 :
1433 : /************************************************************************/
1434 : /* GWKOverlayDensity() */
1435 : /* */
1436 : /* Compute the final density for the destination pixel. This */
1437 : /* is a function of the overlay density (passed in) and the */
1438 : /* original density. */
1439 : /************************************************************************/
1440 :
1441 7941280 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
1442 : double dfDensity)
1443 : {
1444 7941280 : if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
1445 6750400 : return;
1446 :
1447 1190880 : poWK->pafDstDensity[iDstOffset] = static_cast<float>(
1448 1190880 : 1.0 - (1.0 - dfDensity) * (1.0 - poWK->pafDstDensity[iDstOffset]));
1449 : }
1450 :
1451 : /************************************************************************/
1452 : /* GWKRoundValueT() */
1453 : /************************************************************************/
1454 :
1455 : template <class T, bool is_signed> struct sGWKRoundValueT
1456 : {
1457 : static T eval(double);
1458 : };
1459 :
1460 : template <class T> struct sGWKRoundValueT<T, true> /* signed */
1461 : {
1462 2312700 : static T eval(double dfValue)
1463 : {
1464 2312700 : return static_cast<T>(floor(dfValue + 0.5));
1465 : }
1466 : };
1467 :
1468 : template <class T> struct sGWKRoundValueT<T, false> /* unsigned */
1469 : {
1470 12960381 : static T eval(double dfValue)
1471 : {
1472 12960381 : return static_cast<T>(dfValue + 0.5);
1473 : }
1474 : };
1475 :
1476 15244281 : template <class T> static T GWKRoundValueT(double dfValue)
1477 : {
1478 15244281 : return sGWKRoundValueT<T, std::numeric_limits<T>::is_signed>::eval(dfValue);
1479 : }
1480 :
1481 269074 : template <> float GWKRoundValueT<float>(double dfValue)
1482 : {
1483 269074 : return static_cast<float>(dfValue);
1484 : }
1485 :
1486 : #ifdef notused
1487 : template <> double GWKRoundValueT<double>(double dfValue)
1488 : {
1489 : return dfValue;
1490 : }
1491 : #endif
1492 :
1493 : /************************************************************************/
1494 : /* GWKClampValueT() */
1495 : /************************************************************************/
1496 :
1497 10463066 : template <class T> static CPL_INLINE T GWKClampValueT(double dfValue)
1498 : {
1499 10463066 : if (dfValue < std::numeric_limits<T>::min())
1500 3969 : return std::numeric_limits<T>::min();
1501 10450868 : else if (dfValue > std::numeric_limits<T>::max())
1502 18463 : return std::numeric_limits<T>::max();
1503 : else
1504 10429938 : return GWKRoundValueT<T>(dfValue);
1505 : }
1506 :
1507 718914 : template <> float GWKClampValueT<float>(double dfValue)
1508 : {
1509 718914 : return static_cast<float>(dfValue);
1510 : }
1511 :
1512 : #ifdef notused
1513 : template <> double GWKClampValueT<double>(double dfValue)
1514 : {
1515 : return dfValue;
1516 : }
1517 : #endif
1518 :
1519 : /************************************************************************/
1520 : /* GWKSetPixelValueRealT() */
1521 : /************************************************************************/
1522 :
1523 : template <class T>
1524 159076 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
1525 : GPtrDiff_t iDstOffset, double dfDensity,
1526 : T value)
1527 : {
1528 159076 : T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
1529 :
1530 : /* -------------------------------------------------------------------- */
1531 : /* If the source density is less than 100% we need to fetch the */
1532 : /* existing destination value, and mix it with the source to */
1533 : /* get the new "to apply" value. Also compute composite */
1534 : /* density. */
1535 : /* */
1536 : /* We avoid mixing if density is very near one or risk mixing */
1537 : /* in very extreme nodata values and causing odd results (#1610) */
1538 : /* -------------------------------------------------------------------- */
1539 159076 : if (dfDensity < 0.9999)
1540 : {
1541 159076 : if (dfDensity < 0.0001)
1542 0 : return true;
1543 :
1544 159076 : double dfDstDensity = 1.0;
1545 :
1546 159076 : if (poWK->pafDstDensity != nullptr)
1547 157604 : dfDstDensity = poWK->pafDstDensity[iDstOffset];
1548 1472 : else if (poWK->panDstValid != nullptr &&
1549 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1550 0 : dfDstDensity = 0.0;
1551 :
1552 : // It seems like we also ought to be testing panDstValid[] here!
1553 :
1554 159076 : const double dfDstReal = pDst[iDstOffset];
1555 :
1556 : // The destination density is really only relative to the portion
1557 : // not occluded by the overlay.
1558 159076 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1559 :
1560 159076 : const double dfReal = (value * dfDensity + dfDstReal * dfDstInfluence) /
1561 159076 : (dfDensity + dfDstInfluence);
1562 :
1563 : /* --------------------------------------------------------------------
1564 : */
1565 : /* Actually apply the destination value. */
1566 : /* */
1567 : /* Avoid using the destination nodata value for integer datatypes
1568 : */
1569 : /* if by chance it is equal to the computed pixel value. */
1570 : /* --------------------------------------------------------------------
1571 : */
1572 159076 : pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
1573 : }
1574 : else
1575 : {
1576 0 : pDst[iDstOffset] = value;
1577 : }
1578 :
1579 159076 : if (poWK->padfDstNoDataReal != nullptr &&
1580 0 : poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
1581 : {
1582 0 : if (pDst[iDstOffset] == std::numeric_limits<T>::min())
1583 0 : pDst[iDstOffset] = std::numeric_limits<T>::min() + 1;
1584 : else
1585 0 : pDst[iDstOffset]--;
1586 : }
1587 :
1588 159076 : return true;
1589 : }
1590 :
1591 : /************************************************************************/
1592 : /* GWKSetPixelValue() */
1593 : /************************************************************************/
1594 :
1595 3867630 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
1596 : GPtrDiff_t iDstOffset, double dfDensity,
1597 : double dfReal, double dfImag)
1598 :
1599 : {
1600 3867630 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1601 :
1602 : /* -------------------------------------------------------------------- */
1603 : /* If the source density is less than 100% we need to fetch the */
1604 : /* existing destination value, and mix it with the source to */
1605 : /* get the new "to apply" value. Also compute composite */
1606 : /* density. */
1607 : /* */
1608 : /* We avoid mixing if density is very near one or risk mixing */
1609 : /* in very extreme nodata values and causing odd results (#1610) */
1610 : /* -------------------------------------------------------------------- */
1611 3867630 : if (dfDensity < 0.9999)
1612 : {
1613 800 : if (dfDensity < 0.0001)
1614 0 : return true;
1615 :
1616 800 : double dfDstDensity = 1.0;
1617 800 : if (poWK->pafDstDensity != nullptr)
1618 800 : dfDstDensity = poWK->pafDstDensity[iDstOffset];
1619 0 : else if (poWK->panDstValid != nullptr &&
1620 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1621 0 : dfDstDensity = 0.0;
1622 :
1623 800 : double dfDstReal = 0.0;
1624 800 : double dfDstImag = 0.0;
1625 : // It seems like we also ought to be testing panDstValid[] here!
1626 :
1627 : // TODO(schwehr): Factor out this repreated type of set.
1628 800 : switch (poWK->eWorkingDataType)
1629 : {
1630 0 : case GDT_Byte:
1631 0 : dfDstReal = pabyDst[iDstOffset];
1632 0 : dfDstImag = 0.0;
1633 0 : break;
1634 :
1635 0 : case GDT_Int8:
1636 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1637 0 : dfDstImag = 0.0;
1638 0 : break;
1639 :
1640 400 : case GDT_Int16:
1641 400 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1642 400 : dfDstImag = 0.0;
1643 400 : break;
1644 :
1645 400 : case GDT_UInt16:
1646 400 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1647 400 : dfDstImag = 0.0;
1648 400 : break;
1649 :
1650 0 : case GDT_Int32:
1651 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1652 0 : dfDstImag = 0.0;
1653 0 : break;
1654 :
1655 0 : case GDT_UInt32:
1656 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1657 0 : dfDstImag = 0.0;
1658 0 : break;
1659 :
1660 0 : case GDT_Int64:
1661 0 : dfDstReal = static_cast<double>(
1662 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1663 0 : dfDstImag = 0.0;
1664 0 : break;
1665 :
1666 0 : case GDT_UInt64:
1667 0 : dfDstReal = static_cast<double>(
1668 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1669 0 : dfDstImag = 0.0;
1670 0 : break;
1671 :
1672 0 : case GDT_Float32:
1673 0 : dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
1674 0 : dfDstImag = 0.0;
1675 0 : break;
1676 :
1677 0 : case GDT_Float64:
1678 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1679 0 : dfDstImag = 0.0;
1680 0 : break;
1681 :
1682 0 : case GDT_CInt16:
1683 0 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
1684 0 : dfDstImag =
1685 0 : reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
1686 0 : break;
1687 :
1688 0 : case GDT_CInt32:
1689 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
1690 0 : dfDstImag =
1691 0 : reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
1692 0 : break;
1693 :
1694 0 : case GDT_CFloat32:
1695 0 : dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset * 2];
1696 0 : dfDstImag =
1697 0 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1];
1698 0 : break;
1699 :
1700 0 : case GDT_CFloat64:
1701 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
1702 0 : dfDstImag =
1703 0 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
1704 0 : break;
1705 :
1706 0 : case GDT_Unknown:
1707 : case GDT_TypeCount:
1708 0 : CPLAssert(false);
1709 : return false;
1710 : }
1711 :
1712 : // The destination density is really only relative to the portion
1713 : // not occluded by the overlay.
1714 800 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1715 :
1716 800 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
1717 800 : (dfDensity + dfDstInfluence);
1718 :
1719 800 : dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
1720 800 : (dfDensity + dfDstInfluence);
1721 : }
1722 :
1723 : /* -------------------------------------------------------------------- */
1724 : /* Actually apply the destination value. */
1725 : /* */
1726 : /* Avoid using the destination nodata value for integer datatypes */
1727 : /* if by chance it is equal to the computed pixel value. */
1728 : /* -------------------------------------------------------------------- */
1729 :
1730 : // TODO(schwehr): Can we make this a template?
1731 : #define CLAMP(type) \
1732 : do \
1733 : { \
1734 : type *_pDst = reinterpret_cast<type *>(pabyDst); \
1735 : if (dfReal < static_cast<double>(std::numeric_limits<type>::min())) \
1736 : _pDst[iDstOffset] = \
1737 : static_cast<type>(std::numeric_limits<type>::min()); \
1738 : else if (dfReal > \
1739 : static_cast<double>(std::numeric_limits<type>::max())) \
1740 : _pDst[iDstOffset] = \
1741 : static_cast<type>(std::numeric_limits<type>::max()); \
1742 : else \
1743 : _pDst[iDstOffset] = (std::numeric_limits<type>::is_signed) \
1744 : ? static_cast<type>(floor(dfReal + 0.5)) \
1745 : : static_cast<type>(dfReal + 0.5); \
1746 : if (poWK->padfDstNoDataReal != nullptr && \
1747 : poWK->padfDstNoDataReal[iBand] == \
1748 : static_cast<double>(_pDst[iDstOffset])) \
1749 : { \
1750 : if (_pDst[iDstOffset] == \
1751 : static_cast<type>(std::numeric_limits<type>::min())) \
1752 : _pDst[iDstOffset] = \
1753 : static_cast<type>(std::numeric_limits<type>::min() + 1); \
1754 : else \
1755 : _pDst[iDstOffset]--; \
1756 : } \
1757 : } while (false)
1758 :
1759 3867630 : switch (poWK->eWorkingDataType)
1760 : {
1761 3141450 : case GDT_Byte:
1762 3141450 : CLAMP(GByte);
1763 3141450 : break;
1764 :
1765 0 : case GDT_Int8:
1766 0 : CLAMP(GInt8);
1767 0 : break;
1768 :
1769 7465 : case GDT_Int16:
1770 7465 : CLAMP(GInt16);
1771 7465 : break;
1772 :
1773 463 : case GDT_UInt16:
1774 463 : CLAMP(GUInt16);
1775 463 : break;
1776 :
1777 63 : case GDT_UInt32:
1778 63 : CLAMP(GUInt32);
1779 63 : break;
1780 :
1781 3463 : case GDT_Int32:
1782 3463 : CLAMP(GInt32);
1783 3463 : break;
1784 :
1785 0 : case GDT_UInt64:
1786 0 : CLAMP(std::uint64_t);
1787 0 : break;
1788 :
1789 0 : case GDT_Int64:
1790 0 : CLAMP(std::int64_t);
1791 0 : break;
1792 :
1793 478957 : case GDT_Float32:
1794 478957 : reinterpret_cast<float *>(pabyDst)[iDstOffset] =
1795 478957 : static_cast<float>(dfReal);
1796 478957 : break;
1797 :
1798 147 : case GDT_Float64:
1799 147 : reinterpret_cast<double *>(pabyDst)[iDstOffset] = dfReal;
1800 147 : break;
1801 :
1802 234178 : case GDT_CInt16:
1803 : {
1804 : typedef GInt16 T;
1805 234178 : if (dfReal < static_cast<double>(std::numeric_limits<T>::min()))
1806 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1807 0 : std::numeric_limits<T>::min();
1808 234178 : else if (dfReal >
1809 234178 : static_cast<double>(std::numeric_limits<T>::max()))
1810 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1811 0 : std::numeric_limits<T>::max();
1812 : else
1813 234178 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1814 234178 : static_cast<T>(floor(dfReal + 0.5));
1815 234178 : if (dfImag < static_cast<double>(std::numeric_limits<T>::min()))
1816 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1817 0 : std::numeric_limits<T>::min();
1818 234178 : else if (dfImag >
1819 234178 : static_cast<double>(std::numeric_limits<T>::max()))
1820 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1821 0 : std::numeric_limits<T>::max();
1822 : else
1823 234178 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1824 234178 : static_cast<T>(floor(dfImag + 0.5));
1825 234178 : break;
1826 : }
1827 :
1828 478 : case GDT_CInt32:
1829 : {
1830 : typedef GInt32 T;
1831 478 : if (dfReal < static_cast<double>(std::numeric_limits<T>::min()))
1832 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1833 0 : std::numeric_limits<T>::min();
1834 478 : else if (dfReal >
1835 478 : static_cast<double>(std::numeric_limits<T>::max()))
1836 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1837 0 : std::numeric_limits<T>::max();
1838 : else
1839 478 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1840 478 : static_cast<T>(floor(dfReal + 0.5));
1841 478 : if (dfImag < static_cast<double>(std::numeric_limits<T>::min()))
1842 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1843 0 : std::numeric_limits<T>::min();
1844 478 : else if (dfImag >
1845 478 : static_cast<double>(std::numeric_limits<T>::max()))
1846 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1847 0 : std::numeric_limits<T>::max();
1848 : else
1849 478 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1850 478 : static_cast<T>(floor(dfImag + 0.5));
1851 478 : break;
1852 : }
1853 :
1854 490 : case GDT_CFloat32:
1855 490 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
1856 490 : static_cast<float>(dfReal);
1857 490 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
1858 490 : static_cast<float>(dfImag);
1859 490 : break;
1860 :
1861 478 : case GDT_CFloat64:
1862 478 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
1863 478 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
1864 478 : break;
1865 :
1866 0 : case GDT_Unknown:
1867 : case GDT_TypeCount:
1868 0 : return false;
1869 : }
1870 :
1871 3867630 : return true;
1872 : }
1873 :
1874 : /************************************************************************/
1875 : /* GWKSetPixelValueReal() */
1876 : /************************************************************************/
1877 :
1878 923761 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
1879 : GPtrDiff_t iDstOffset, double dfDensity,
1880 : double dfReal)
1881 :
1882 : {
1883 923761 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1884 :
1885 : /* -------------------------------------------------------------------- */
1886 : /* If the source density is less than 100% we need to fetch the */
1887 : /* existing destination value, and mix it with the source to */
1888 : /* get the new "to apply" value. Also compute composite */
1889 : /* density. */
1890 : /* */
1891 : /* We avoid mixing if density is very near one or risk mixing */
1892 : /* in very extreme nodata values and causing odd results (#1610) */
1893 : /* -------------------------------------------------------------------- */
1894 923761 : if (dfDensity < 0.9999)
1895 : {
1896 600 : if (dfDensity < 0.0001)
1897 0 : return true;
1898 :
1899 600 : double dfDstReal = 0.0;
1900 600 : double dfDstDensity = 1.0;
1901 :
1902 600 : if (poWK->pafDstDensity != nullptr)
1903 600 : dfDstDensity = poWK->pafDstDensity[iDstOffset];
1904 0 : else if (poWK->panDstValid != nullptr &&
1905 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1906 0 : dfDstDensity = 0.0;
1907 :
1908 : // It seems like we also ought to be testing panDstValid[] here!
1909 :
1910 600 : switch (poWK->eWorkingDataType)
1911 : {
1912 0 : case GDT_Byte:
1913 0 : dfDstReal = pabyDst[iDstOffset];
1914 0 : break;
1915 :
1916 0 : case GDT_Int8:
1917 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1918 0 : break;
1919 :
1920 300 : case GDT_Int16:
1921 300 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1922 300 : break;
1923 :
1924 300 : case GDT_UInt16:
1925 300 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1926 300 : break;
1927 :
1928 0 : case GDT_Int32:
1929 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1930 0 : break;
1931 :
1932 0 : case GDT_UInt32:
1933 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1934 0 : break;
1935 :
1936 0 : case GDT_Int64:
1937 0 : dfDstReal = static_cast<double>(
1938 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1939 0 : break;
1940 :
1941 0 : case GDT_UInt64:
1942 0 : dfDstReal = static_cast<double>(
1943 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1944 0 : break;
1945 :
1946 0 : case GDT_Float32:
1947 0 : dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
1948 0 : break;
1949 :
1950 0 : case GDT_Float64:
1951 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1952 0 : break;
1953 :
1954 0 : case GDT_CInt16:
1955 : case GDT_CInt32:
1956 : case GDT_CFloat32:
1957 : case GDT_CFloat64:
1958 : case GDT_Unknown:
1959 : case GDT_TypeCount:
1960 0 : CPLAssert(false);
1961 : return false;
1962 : }
1963 :
1964 : // The destination density is really only relative to the portion
1965 : // not occluded by the overlay.
1966 600 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1967 :
1968 600 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
1969 600 : (dfDensity + dfDstInfluence);
1970 : }
1971 :
1972 : /* -------------------------------------------------------------------- */
1973 : /* Actually apply the destination value. */
1974 : /* */
1975 : /* Avoid using the destination nodata value for integer datatypes */
1976 : /* if by chance it is equal to the computed pixel value. */
1977 : /* -------------------------------------------------------------------- */
1978 :
1979 923761 : switch (poWK->eWorkingDataType)
1980 : {
1981 916736 : case GDT_Byte:
1982 916736 : CLAMP(GByte);
1983 916736 : break;
1984 :
1985 0 : case GDT_Int8:
1986 0 : CLAMP(GInt8);
1987 0 : break;
1988 :
1989 1085 : case GDT_Int16:
1990 1085 : CLAMP(GInt16);
1991 1085 : break;
1992 :
1993 363 : case GDT_UInt16:
1994 363 : CLAMP(GUInt16);
1995 363 : break;
1996 :
1997 315 : case GDT_UInt32:
1998 315 : CLAMP(GUInt32);
1999 315 : break;
2000 :
2001 1318 : case GDT_Int32:
2002 1318 : CLAMP(GInt32);
2003 1318 : break;
2004 :
2005 0 : case GDT_UInt64:
2006 0 : CLAMP(std::uint64_t);
2007 0 : break;
2008 :
2009 100 : case GDT_Int64:
2010 100 : CLAMP(std::int64_t);
2011 100 : break;
2012 :
2013 3426 : case GDT_Float32:
2014 3426 : reinterpret_cast<float *>(pabyDst)[iDstOffset] =
2015 3426 : static_cast<float>(dfReal);
2016 3426 : break;
2017 :
2018 418 : case GDT_Float64:
2019 418 : reinterpret_cast<double *>(pabyDst)[iDstOffset] = dfReal;
2020 418 : break;
2021 :
2022 0 : case GDT_CInt16:
2023 : case GDT_CInt32:
2024 : case GDT_CFloat32:
2025 : case GDT_CFloat64:
2026 0 : return false;
2027 :
2028 0 : case GDT_Unknown:
2029 : case GDT_TypeCount:
2030 0 : CPLAssert(false);
2031 : return false;
2032 : }
2033 :
2034 923761 : return true;
2035 : }
2036 :
2037 : /************************************************************************/
2038 : /* GWKGetPixelValue() */
2039 : /************************************************************************/
2040 :
2041 : /* It is assumed that panUnifiedSrcValid has been checked before */
2042 :
2043 29336000 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
2044 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2045 : double *pdfReal, double *pdfImag)
2046 :
2047 : {
2048 29336000 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2049 :
2050 58672100 : if (poWK->papanBandSrcValid != nullptr &&
2051 29336000 : poWK->papanBandSrcValid[iBand] != nullptr &&
2052 0 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2053 : {
2054 0 : *pdfDensity = 0.0;
2055 0 : return false;
2056 : }
2057 :
2058 29336000 : *pdfReal = 0.0;
2059 29336000 : *pdfImag = 0.0;
2060 :
2061 : // TODO(schwehr): Fix casting.
2062 29336000 : switch (poWK->eWorkingDataType)
2063 : {
2064 28245600 : case GDT_Byte:
2065 28245600 : *pdfReal = pabySrc[iSrcOffset];
2066 28245600 : *pdfImag = 0.0;
2067 28245600 : break;
2068 :
2069 0 : case GDT_Int8:
2070 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2071 0 : *pdfImag = 0.0;
2072 0 : break;
2073 :
2074 28181 : case GDT_Int16:
2075 28181 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2076 28181 : *pdfImag = 0.0;
2077 28181 : break;
2078 :
2079 163 : case GDT_UInt16:
2080 163 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2081 163 : *pdfImag = 0.0;
2082 163 : break;
2083 :
2084 13663 : case GDT_Int32:
2085 13663 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2086 13663 : *pdfImag = 0.0;
2087 13663 : break;
2088 :
2089 63 : case GDT_UInt32:
2090 63 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2091 63 : *pdfImag = 0.0;
2092 63 : break;
2093 :
2094 0 : case GDT_Int64:
2095 0 : *pdfReal = static_cast<double>(
2096 0 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2097 0 : *pdfImag = 0.0;
2098 0 : break;
2099 :
2100 0 : case GDT_UInt64:
2101 0 : *pdfReal = static_cast<double>(
2102 0 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2103 0 : *pdfImag = 0.0;
2104 0 : break;
2105 :
2106 1047220 : case GDT_Float32:
2107 1047220 : *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
2108 1047220 : *pdfImag = 0.0;
2109 1047220 : break;
2110 :
2111 582 : case GDT_Float64:
2112 582 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2113 582 : *pdfImag = 0.0;
2114 582 : break;
2115 :
2116 130 : case GDT_CInt16:
2117 130 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
2118 130 : *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
2119 130 : break;
2120 :
2121 130 : case GDT_CInt32:
2122 130 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
2123 130 : *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
2124 130 : break;
2125 :
2126 178 : case GDT_CFloat32:
2127 178 : *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2];
2128 178 : *pdfImag = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1];
2129 178 : break;
2130 :
2131 130 : case GDT_CFloat64:
2132 130 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
2133 130 : *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
2134 130 : break;
2135 :
2136 0 : case GDT_Unknown:
2137 : case GDT_TypeCount:
2138 0 : CPLAssert(false);
2139 : *pdfDensity = 0.0;
2140 : return false;
2141 : }
2142 :
2143 29336000 : if (poWK->pafUnifiedSrcDensity != nullptr)
2144 3015160 : *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2145 : else
2146 26320900 : *pdfDensity = 1.0;
2147 :
2148 29336000 : return *pdfDensity != 0.0;
2149 : }
2150 :
2151 : /************************************************************************/
2152 : /* GWKGetPixelValueReal() */
2153 : /************************************************************************/
2154 :
2155 1012 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2156 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2157 : double *pdfReal)
2158 :
2159 : {
2160 1012 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2161 :
2162 2026 : if (poWK->papanBandSrcValid != nullptr &&
2163 1014 : poWK->papanBandSrcValid[iBand] != nullptr &&
2164 2 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2165 : {
2166 0 : *pdfDensity = 0.0;
2167 0 : return false;
2168 : }
2169 :
2170 1012 : switch (poWK->eWorkingDataType)
2171 : {
2172 1 : case GDT_Byte:
2173 1 : *pdfReal = pabySrc[iSrcOffset];
2174 1 : break;
2175 :
2176 0 : case GDT_Int8:
2177 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2178 0 : break;
2179 :
2180 1 : case GDT_Int16:
2181 1 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2182 1 : break;
2183 :
2184 1 : case GDT_UInt16:
2185 1 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2186 1 : break;
2187 :
2188 870 : case GDT_Int32:
2189 870 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2190 870 : break;
2191 :
2192 67 : case GDT_UInt32:
2193 67 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2194 67 : break;
2195 :
2196 0 : case GDT_Int64:
2197 0 : *pdfReal = static_cast<double>(
2198 0 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2199 0 : break;
2200 :
2201 0 : case GDT_UInt64:
2202 0 : *pdfReal = static_cast<double>(
2203 0 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2204 0 : break;
2205 :
2206 2 : case GDT_Float32:
2207 2 : *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
2208 2 : break;
2209 :
2210 70 : case GDT_Float64:
2211 70 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2212 70 : break;
2213 :
2214 0 : case GDT_CInt16:
2215 : case GDT_CInt32:
2216 : case GDT_CFloat32:
2217 : case GDT_CFloat64:
2218 : case GDT_Unknown:
2219 : case GDT_TypeCount:
2220 0 : CPLAssert(false);
2221 : return false;
2222 : }
2223 :
2224 1012 : if (poWK->pafUnifiedSrcDensity != nullptr)
2225 0 : *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2226 : else
2227 1012 : *pdfDensity = 1.0;
2228 :
2229 1012 : return *pdfDensity != 0.0;
2230 : }
2231 :
2232 : /************************************************************************/
2233 : /* GWKGetPixelRow() */
2234 : /************************************************************************/
2235 :
2236 : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
2237 : /* data-types. */
2238 :
2239 2353850 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
2240 : GPtrDiff_t iSrcOffset, int nHalfSrcLen,
2241 : double *padfDensity, double adfReal[],
2242 : double *padfImag)
2243 : {
2244 : // We know that nSrcLen is even, so we can *always* unroll loops 2x.
2245 2353850 : const int nSrcLen = nHalfSrcLen * 2;
2246 2353850 : bool bHasValid = false;
2247 :
2248 2353850 : if (padfDensity != nullptr)
2249 : {
2250 : // Init the density.
2251 3345770 : for (int i = 0; i < nSrcLen; i += 2)
2252 : {
2253 2189510 : padfDensity[i] = 1.0;
2254 2189510 : padfDensity[i + 1] = 1.0;
2255 : }
2256 :
2257 1156260 : if (poWK->panUnifiedSrcValid != nullptr)
2258 : {
2259 3281460 : for (int i = 0; i < nSrcLen; i += 2)
2260 : {
2261 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
2262 2067740 : bHasValid = true;
2263 : else
2264 74323 : padfDensity[i] = 0.0;
2265 :
2266 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
2267 2068400 : bHasValid = true;
2268 : else
2269 73668 : padfDensity[i + 1] = 0.0;
2270 : }
2271 :
2272 : // Reset or fail as needed.
2273 1139400 : if (bHasValid)
2274 1116590 : bHasValid = false;
2275 : else
2276 22806 : return false;
2277 : }
2278 :
2279 1133450 : if (poWK->papanBandSrcValid != nullptr &&
2280 0 : poWK->papanBandSrcValid[iBand] != nullptr)
2281 : {
2282 0 : for (int i = 0; i < nSrcLen; i += 2)
2283 : {
2284 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
2285 0 : bHasValid = true;
2286 : else
2287 0 : padfDensity[i] = 0.0;
2288 :
2289 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
2290 0 : iSrcOffset + i + 1))
2291 0 : bHasValid = true;
2292 : else
2293 0 : padfDensity[i + 1] = 0.0;
2294 : }
2295 :
2296 : // Reset or fail as needed.
2297 0 : if (bHasValid)
2298 0 : bHasValid = false;
2299 : else
2300 0 : return false;
2301 : }
2302 : }
2303 :
2304 : // TODO(schwehr): Fix casting.
2305 : // Fetch data.
2306 2331040 : switch (poWK->eWorkingDataType)
2307 : {
2308 1121060 : case GDT_Byte:
2309 : {
2310 1121060 : GByte *pSrc =
2311 1121060 : reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
2312 1121060 : pSrc += iSrcOffset;
2313 3243800 : for (int i = 0; i < nSrcLen; i += 2)
2314 : {
2315 2122740 : adfReal[i] = pSrc[i];
2316 2122740 : adfReal[i + 1] = pSrc[i + 1];
2317 : }
2318 1121060 : break;
2319 : }
2320 :
2321 0 : case GDT_Int8:
2322 : {
2323 0 : GInt8 *pSrc =
2324 0 : reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
2325 0 : pSrc += iSrcOffset;
2326 0 : for (int i = 0; i < nSrcLen; i += 2)
2327 : {
2328 0 : adfReal[i] = pSrc[i];
2329 0 : adfReal[i + 1] = pSrc[i + 1];
2330 : }
2331 0 : break;
2332 : }
2333 :
2334 5558 : case GDT_Int16:
2335 : {
2336 5558 : GInt16 *pSrc =
2337 5558 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2338 5558 : pSrc += iSrcOffset;
2339 21380 : for (int i = 0; i < nSrcLen; i += 2)
2340 : {
2341 15822 : adfReal[i] = pSrc[i];
2342 15822 : adfReal[i + 1] = pSrc[i + 1];
2343 : }
2344 5558 : break;
2345 : }
2346 :
2347 4114 : case GDT_UInt16:
2348 : {
2349 4114 : GUInt16 *pSrc =
2350 4114 : reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
2351 4114 : pSrc += iSrcOffset;
2352 18492 : for (int i = 0; i < nSrcLen; i += 2)
2353 : {
2354 14378 : adfReal[i] = pSrc[i];
2355 14378 : adfReal[i + 1] = pSrc[i + 1];
2356 : }
2357 4114 : break;
2358 : }
2359 :
2360 1130 : case GDT_Int32:
2361 : {
2362 1130 : GInt32 *pSrc =
2363 1130 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2364 1130 : pSrc += iSrcOffset;
2365 2992 : for (int i = 0; i < nSrcLen; i += 2)
2366 : {
2367 1862 : adfReal[i] = pSrc[i];
2368 1862 : adfReal[i + 1] = pSrc[i + 1];
2369 : }
2370 1130 : break;
2371 : }
2372 :
2373 750 : case GDT_UInt32:
2374 : {
2375 750 : GUInt32 *pSrc =
2376 750 : reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
2377 750 : pSrc += iSrcOffset;
2378 2232 : for (int i = 0; i < nSrcLen; i += 2)
2379 : {
2380 1482 : adfReal[i] = pSrc[i];
2381 1482 : adfReal[i + 1] = pSrc[i + 1];
2382 : }
2383 750 : break;
2384 : }
2385 :
2386 190 : case GDT_Int64:
2387 : {
2388 190 : auto pSrc =
2389 190 : reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
2390 190 : pSrc += iSrcOffset;
2391 380 : for (int i = 0; i < nSrcLen; i += 2)
2392 : {
2393 190 : adfReal[i] = static_cast<double>(pSrc[i]);
2394 190 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2395 : }
2396 190 : break;
2397 : }
2398 :
2399 0 : case GDT_UInt64:
2400 : {
2401 0 : auto pSrc =
2402 0 : reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
2403 0 : pSrc += iSrcOffset;
2404 0 : for (int i = 0; i < nSrcLen; i += 2)
2405 : {
2406 0 : adfReal[i] = static_cast<double>(pSrc[i]);
2407 0 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2408 : }
2409 0 : break;
2410 : }
2411 :
2412 25074 : case GDT_Float32:
2413 : {
2414 25074 : float *pSrc =
2415 25074 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2416 25074 : pSrc += iSrcOffset;
2417 121347 : for (int i = 0; i < nSrcLen; i += 2)
2418 : {
2419 96273 : adfReal[i] = pSrc[i];
2420 96273 : adfReal[i + 1] = pSrc[i + 1];
2421 : }
2422 25074 : break;
2423 : }
2424 :
2425 940 : case GDT_Float64:
2426 : {
2427 940 : double *pSrc =
2428 940 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2429 940 : pSrc += iSrcOffset;
2430 2612 : for (int i = 0; i < nSrcLen; i += 2)
2431 : {
2432 1672 : adfReal[i] = pSrc[i];
2433 1672 : adfReal[i + 1] = pSrc[i + 1];
2434 : }
2435 940 : break;
2436 : }
2437 :
2438 1169410 : case GDT_CInt16:
2439 : {
2440 1169410 : GInt16 *pSrc =
2441 1169410 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2442 1169410 : pSrc += 2 * iSrcOffset;
2443 4676400 : for (int i = 0; i < nSrcLen; i += 2)
2444 : {
2445 3506990 : adfReal[i] = pSrc[2 * i];
2446 3506990 : padfImag[i] = pSrc[2 * i + 1];
2447 :
2448 3506990 : adfReal[i + 1] = pSrc[2 * i + 2];
2449 3506990 : padfImag[i + 1] = pSrc[2 * i + 3];
2450 : }
2451 1169410 : break;
2452 : }
2453 :
2454 940 : case GDT_CInt32:
2455 : {
2456 940 : GInt32 *pSrc =
2457 940 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2458 940 : pSrc += 2 * iSrcOffset;
2459 2612 : for (int i = 0; i < nSrcLen; i += 2)
2460 : {
2461 1672 : adfReal[i] = pSrc[2 * i];
2462 1672 : padfImag[i] = pSrc[2 * i + 1];
2463 :
2464 1672 : adfReal[i + 1] = pSrc[2 * i + 2];
2465 1672 : padfImag[i + 1] = pSrc[2 * i + 3];
2466 : }
2467 940 : break;
2468 : }
2469 :
2470 940 : case GDT_CFloat32:
2471 : {
2472 940 : float *pSrc =
2473 940 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2474 940 : pSrc += 2 * iSrcOffset;
2475 2612 : for (int i = 0; i < nSrcLen; i += 2)
2476 : {
2477 1672 : adfReal[i] = pSrc[2 * i];
2478 1672 : padfImag[i] = pSrc[2 * i + 1];
2479 :
2480 1672 : adfReal[i + 1] = pSrc[2 * i + 2];
2481 1672 : padfImag[i + 1] = pSrc[2 * i + 3];
2482 : }
2483 940 : break;
2484 : }
2485 :
2486 940 : case GDT_CFloat64:
2487 : {
2488 940 : double *pSrc =
2489 940 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2490 940 : pSrc += 2 * iSrcOffset;
2491 2612 : for (int i = 0; i < nSrcLen; i += 2)
2492 : {
2493 1672 : adfReal[i] = pSrc[2 * i];
2494 1672 : padfImag[i] = pSrc[2 * i + 1];
2495 :
2496 1672 : adfReal[i + 1] = pSrc[2 * i + 2];
2497 1672 : padfImag[i + 1] = pSrc[2 * i + 3];
2498 : }
2499 940 : break;
2500 : }
2501 :
2502 0 : case GDT_Unknown:
2503 : case GDT_TypeCount:
2504 0 : CPLAssert(false);
2505 : if (padfDensity)
2506 : memset(padfDensity, 0, nSrcLen * sizeof(double));
2507 : return false;
2508 : }
2509 :
2510 2331040 : if (padfDensity == nullptr)
2511 1197590 : return true;
2512 :
2513 1133450 : if (poWK->pafUnifiedSrcDensity == nullptr)
2514 : {
2515 3234200 : for (int i = 0; i < nSrcLen; i += 2)
2516 : {
2517 : // Take into account earlier calcs.
2518 2112850 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2519 : {
2520 2072950 : padfDensity[i] = 1.0;
2521 2072950 : bHasValid = true;
2522 : }
2523 :
2524 2112850 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2525 : {
2526 2073600 : padfDensity[i + 1] = 1.0;
2527 2073600 : bHasValid = true;
2528 : }
2529 : }
2530 : }
2531 : else
2532 : {
2533 54348 : for (int i = 0; i < nSrcLen; i += 2)
2534 : {
2535 42243 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2536 42243 : padfDensity[i] = poWK->pafUnifiedSrcDensity[iSrcOffset + i];
2537 42243 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2538 41704 : bHasValid = true;
2539 :
2540 42243 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2541 42243 : padfDensity[i + 1] =
2542 42243 : poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1];
2543 42243 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2544 41594 : bHasValid = true;
2545 : }
2546 : }
2547 :
2548 1133450 : return bHasValid;
2549 : }
2550 :
2551 : /************************************************************************/
2552 : /* GWKGetPixelT() */
2553 : /************************************************************************/
2554 :
2555 : template <class T>
2556 7332114 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
2557 : GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
2558 :
2559 : {
2560 7332114 : T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2561 :
2562 16802154 : if ((poWK->panUnifiedSrcValid != nullptr &&
2563 14664208 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
2564 7332114 : (poWK->papanBandSrcValid != nullptr &&
2565 21 : poWK->papanBandSrcValid[iBand] != nullptr &&
2566 21 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
2567 : {
2568 9 : *pdfDensity = 0.0;
2569 9 : return false;
2570 : }
2571 :
2572 7332104 : *pValue = pSrc[iSrcOffset];
2573 :
2574 7332104 : if (poWK->pafUnifiedSrcDensity == nullptr)
2575 6997351 : *pdfDensity = 1.0;
2576 : else
2577 334754 : *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2578 :
2579 7332104 : return *pdfDensity != 0.0;
2580 : }
2581 :
2582 : /************************************************************************/
2583 : /* GWKBilinearResample() */
2584 : /* Set of bilinear interpolators */
2585 : /************************************************************************/
2586 :
2587 72664 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
2588 : double dfSrcX, double dfSrcY,
2589 : double *pdfDensity, double *pdfReal,
2590 : double *pdfImag)
2591 :
2592 : {
2593 : // Save as local variables to avoid following pointers.
2594 72664 : const int nSrcXSize = poWK->nSrcXSize;
2595 72664 : const int nSrcYSize = poWK->nSrcYSize;
2596 :
2597 72664 : int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2598 72664 : int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2599 72664 : double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2600 72664 : double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2601 72664 : bool bShifted = false;
2602 :
2603 72664 : if (iSrcX == -1)
2604 : {
2605 292 : iSrcX = 0;
2606 292 : dfRatioX = 1;
2607 : }
2608 72664 : if (iSrcY == -1)
2609 : {
2610 7686 : iSrcY = 0;
2611 7686 : dfRatioY = 1;
2612 : }
2613 72664 : GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
2614 :
2615 : // Shift so we don't overrun the array.
2616 72664 : if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
2617 72614 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
2618 72614 : iSrcOffset + nSrcXSize + 1)
2619 : {
2620 100 : bShifted = true;
2621 100 : --iSrcOffset;
2622 : }
2623 :
2624 72664 : double adfDensity[2] = {0.0, 0.0};
2625 72664 : double adfReal[2] = {0.0, 0.0};
2626 72664 : double adfImag[2] = {0.0, 0.0};
2627 72664 : double dfAccumulatorReal = 0.0;
2628 72664 : double dfAccumulatorImag = 0.0;
2629 72664 : double dfAccumulatorDensity = 0.0;
2630 72664 : double dfAccumulatorDivisor = 0.0;
2631 :
2632 72664 : const GPtrDiff_t nSrcPixels =
2633 72664 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
2634 : // Get pixel row.
2635 72664 : if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
2636 145328 : iSrcOffset < nSrcPixels &&
2637 72664 : GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
2638 : adfImag))
2639 : {
2640 67008 : double dfMult1 = dfRatioX * dfRatioY;
2641 67008 : double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
2642 :
2643 : // Shifting corrected.
2644 67008 : if (bShifted)
2645 : {
2646 100 : adfReal[0] = adfReal[1];
2647 100 : adfImag[0] = adfImag[1];
2648 100 : adfDensity[0] = adfDensity[1];
2649 : }
2650 :
2651 : // Upper Left Pixel.
2652 67008 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2653 67008 : adfDensity[0] > SRC_DENSITY_THRESHOLD)
2654 : {
2655 61578 : dfAccumulatorDivisor += dfMult1;
2656 :
2657 61578 : dfAccumulatorReal += adfReal[0] * dfMult1;
2658 61578 : dfAccumulatorImag += adfImag[0] * dfMult1;
2659 61578 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
2660 : }
2661 :
2662 : // Upper Right Pixel.
2663 67008 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2664 66427 : adfDensity[1] > SRC_DENSITY_THRESHOLD)
2665 : {
2666 61153 : dfAccumulatorDivisor += dfMult2;
2667 :
2668 61153 : dfAccumulatorReal += adfReal[1] * dfMult2;
2669 61153 : dfAccumulatorImag += adfImag[1] * dfMult2;
2670 61153 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
2671 : }
2672 : }
2673 :
2674 : // Get pixel row.
2675 72664 : if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
2676 213910 : iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
2677 68582 : GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
2678 : adfReal, adfImag))
2679 : {
2680 63023 : double dfMult1 = dfRatioX * (1.0 - dfRatioY);
2681 63023 : double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2682 :
2683 : // Shifting corrected
2684 63023 : if (bShifted)
2685 : {
2686 50 : adfReal[0] = adfReal[1];
2687 50 : adfImag[0] = adfImag[1];
2688 50 : adfDensity[0] = adfDensity[1];
2689 : }
2690 :
2691 : // Lower Left Pixel
2692 63023 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2693 63023 : adfDensity[0] > SRC_DENSITY_THRESHOLD)
2694 : {
2695 57744 : dfAccumulatorDivisor += dfMult1;
2696 :
2697 57744 : dfAccumulatorReal += adfReal[0] * dfMult1;
2698 57744 : dfAccumulatorImag += adfImag[0] * dfMult1;
2699 57744 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
2700 : }
2701 :
2702 : // Lower Right Pixel.
2703 63023 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2704 62492 : adfDensity[1] > SRC_DENSITY_THRESHOLD)
2705 : {
2706 57515 : dfAccumulatorDivisor += dfMult2;
2707 :
2708 57515 : dfAccumulatorReal += adfReal[1] * dfMult2;
2709 57515 : dfAccumulatorImag += adfImag[1] * dfMult2;
2710 57515 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
2711 : }
2712 : }
2713 :
2714 : /* -------------------------------------------------------------------- */
2715 : /* Return result. */
2716 : /* -------------------------------------------------------------------- */
2717 72664 : if (dfAccumulatorDivisor == 1.0)
2718 : {
2719 41607 : *pdfReal = dfAccumulatorReal;
2720 41607 : *pdfImag = dfAccumulatorImag;
2721 41607 : *pdfDensity = dfAccumulatorDensity;
2722 41607 : return false;
2723 : }
2724 31057 : else if (dfAccumulatorDivisor < 0.00001)
2725 : {
2726 0 : *pdfReal = 0.0;
2727 0 : *pdfImag = 0.0;
2728 0 : *pdfDensity = 0.0;
2729 0 : return false;
2730 : }
2731 : else
2732 : {
2733 31057 : *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
2734 31057 : *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
2735 31057 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
2736 31057 : return true;
2737 : }
2738 : }
2739 :
2740 : template <class T>
2741 5115304 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
2742 : int iBand, double dfSrcX,
2743 : double dfSrcY, T *pValue)
2744 :
2745 : {
2746 :
2747 5115304 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2748 5115304 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2749 5115304 : GPtrDiff_t iSrcOffset =
2750 5115304 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
2751 5115304 : const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2752 5115304 : const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2753 :
2754 5115304 : const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2755 :
2756 5115304 : if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2757 5012197 : iSrcY + 1 < poWK->nSrcYSize)
2758 : {
2759 4988028 : const double dfAccumulator =
2760 4988028 : (pSrc[iSrcOffset] * dfRatioX +
2761 4988028 : pSrc[iSrcOffset + 1] * (1.0 - dfRatioX)) *
2762 : dfRatioY +
2763 4988028 : (pSrc[iSrcOffset + poWK->nSrcXSize] * dfRatioX +
2764 4988028 : pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * (1.0 - dfRatioX)) *
2765 4988028 : (1.0 - dfRatioY);
2766 :
2767 4988028 : *pValue = GWKRoundValueT<T>(dfAccumulator);
2768 :
2769 4988028 : return true;
2770 : }
2771 :
2772 127289 : double dfAccumulatorDivisor = 0.0;
2773 127289 : double dfAccumulator = 0.0;
2774 :
2775 : // Upper Left Pixel.
2776 127289 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
2777 53406 : iSrcY < poWK->nSrcYSize)
2778 : {
2779 53406 : const double dfMult = dfRatioX * dfRatioY;
2780 :
2781 53406 : dfAccumulatorDivisor += dfMult;
2782 :
2783 53406 : dfAccumulator += pSrc[iSrcOffset] * dfMult;
2784 : }
2785 :
2786 : // Upper Right Pixel.
2787 127289 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2788 61346 : iSrcY < poWK->nSrcYSize)
2789 : {
2790 61346 : const double dfMult = (1.0 - dfRatioX) * dfRatioY;
2791 :
2792 61346 : dfAccumulatorDivisor += dfMult;
2793 :
2794 61346 : dfAccumulator += pSrc[iSrcOffset + 1] * dfMult;
2795 : }
2796 :
2797 : // Lower Right Pixel.
2798 127289 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
2799 97438 : iSrcY + 1 < poWK->nSrcYSize)
2800 : {
2801 72877 : const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2802 :
2803 72877 : dfAccumulatorDivisor += dfMult;
2804 :
2805 72877 : dfAccumulator += pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * dfMult;
2806 : }
2807 :
2808 : // Lower Left Pixel.
2809 127289 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
2810 89475 : iSrcY + 1 < poWK->nSrcYSize)
2811 : {
2812 64706 : const double dfMult = dfRatioX * (1.0 - dfRatioY);
2813 :
2814 64706 : dfAccumulatorDivisor += dfMult;
2815 :
2816 64706 : dfAccumulator += pSrc[iSrcOffset + poWK->nSrcXSize] * dfMult;
2817 : }
2818 :
2819 : /* -------------------------------------------------------------------- */
2820 : /* Return result. */
2821 : /* -------------------------------------------------------------------- */
2822 127289 : double dfValue = 0.0;
2823 :
2824 127289 : if (dfAccumulatorDivisor < 0.00001)
2825 : {
2826 0 : *pValue = 0;
2827 0 : return false;
2828 : }
2829 127289 : else if (dfAccumulatorDivisor == 1.0)
2830 : {
2831 8767 : dfValue = dfAccumulator;
2832 : }
2833 : else
2834 : {
2835 118522 : dfValue = dfAccumulator / dfAccumulatorDivisor;
2836 : }
2837 :
2838 127289 : *pValue = GWKRoundValueT<T>(dfValue);
2839 :
2840 127289 : return true;
2841 : }
2842 :
2843 : /************************************************************************/
2844 : /* GWKCubicResample() */
2845 : /* Set of bicubic interpolators using cubic convolution. */
2846 : /************************************************************************/
2847 :
2848 : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
2849 : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
2850 : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
2851 :
2852 : template <typename T>
2853 1602850 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
2854 : T f1, T f2, T f3)
2855 : {
2856 1602850 : return (f1 + T(0.5) * (distance1 * (f2 - f0) +
2857 1602850 : distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
2858 1602850 : distance3 * (3 * (f1 - f2) + f3 - f0)));
2859 : }
2860 :
2861 : /************************************************************************/
2862 : /* GWKCubicComputeWeights() */
2863 : /************************************************************************/
2864 :
2865 : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
2866 :
2867 : template <typename T>
2868 2267674 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
2869 : {
2870 2267674 : const T halfX = T(0.5) * x;
2871 2267674 : const T threeX = T(3.0) * x;
2872 2267674 : const T halfX2 = halfX * x;
2873 :
2874 2267674 : coeffs[0] = halfX * (-1 + x * (2 - x));
2875 2267674 : coeffs[1] = 1 + halfX2 * (-5 + threeX);
2876 2267674 : coeffs[2] = halfX * (1 + x * (4 - threeX));
2877 2267674 : coeffs[3] = halfX2 * (-1 + x);
2878 2267674 : }
2879 :
2880 : // TODO(schwehr): Use an inline function.
2881 : #define CONVOL4(v1, v2) \
2882 : ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2] + \
2883 : (v1)[3] * (v2)[3])
2884 :
2885 : #if 0
2886 : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
2887 : // instead of 17.
2888 : // TODO(schwehr): Use an inline function.
2889 : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \
2890 : { \
2891 : const double dfX = dfX_; \
2892 : dfHalfX = 0.5 * dfX; \
2893 : const double dfThreeX = 3.0 * dfX; \
2894 : const double dfXMinus1 = dfX - 1; \
2895 : \
2896 : adfCoeffs[0] = -1 + dfX * (2 - dfX); \
2897 : adfCoeffs[1] = dfX * (-5 + dfThreeX); \
2898 : /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \
2899 : adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \
2900 : /*adfCoeffs[3] = dfX * (-1 + dfX); */ \
2901 : adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \
2902 : }
2903 :
2904 : // TODO(schwehr): Use an inline function.
2905 : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \
2906 : ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
2907 : (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
2908 : #endif
2909 :
2910 299879 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
2911 : double dfSrcX, double dfSrcY,
2912 : double *pdfDensity, double *pdfReal,
2913 : double *pdfImag)
2914 :
2915 : {
2916 299879 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
2917 299879 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
2918 299879 : GPtrDiff_t iSrcOffset =
2919 299879 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
2920 299879 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
2921 299879 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
2922 299879 : double adfDensity[4] = {};
2923 299879 : double adfReal[4] = {};
2924 299879 : double adfImag[4] = {};
2925 :
2926 : // Get the bilinear interpolation at the image borders.
2927 299879 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
2928 284412 : iSrcY + 2 >= poWK->nSrcYSize)
2929 24136 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
2930 24136 : pdfDensity, pdfReal, pdfImag);
2931 :
2932 275743 : double adfValueDens[4] = {};
2933 275743 : double adfValueReal[4] = {};
2934 275743 : double adfValueImag[4] = {};
2935 :
2936 275743 : double adfCoeffsX[4] = {};
2937 275743 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
2938 :
2939 1232410 : for (GPtrDiff_t i = -1; i < 3; i++)
2940 : {
2941 1003120 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
2942 991507 : 2, adfDensity, adfReal, adfImag) ||
2943 991507 : adfDensity[0] < SRC_DENSITY_THRESHOLD ||
2944 973867 : adfDensity[1] < SRC_DENSITY_THRESHOLD ||
2945 2960190 : adfDensity[2] < SRC_DENSITY_THRESHOLD ||
2946 965566 : adfDensity[3] < SRC_DENSITY_THRESHOLD)
2947 : {
2948 46449 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
2949 46449 : pdfDensity, pdfReal, pdfImag);
2950 : }
2951 :
2952 956668 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
2953 956668 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
2954 956668 : adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
2955 : }
2956 :
2957 : /* -------------------------------------------------------------------- */
2958 : /* For now, if we have any pixels missing in the kernel area, */
2959 : /* we fallback on using bilinear interpolation. Ideally we */
2960 : /* should do "weight adjustment" of our results similarly to */
2961 : /* what is done for the cubic spline and lanc. interpolators. */
2962 : /* -------------------------------------------------------------------- */
2963 :
2964 229294 : double adfCoeffsY[4] = {};
2965 229294 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
2966 :
2967 229294 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
2968 229294 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
2969 229294 : *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
2970 :
2971 229294 : return true;
2972 : }
2973 :
2974 : #if defined(__x86_64) || defined(_M_X64)
2975 :
2976 : /************************************************************************/
2977 : /* XMMLoad4Values() */
2978 : /* */
2979 : /* Load 4 packed byte or uint16, cast them to float and put them in a */
2980 : /* m128 register. */
2981 : /************************************************************************/
2982 :
2983 949092 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
2984 : {
2985 : unsigned int i;
2986 949092 : memcpy(&i, ptr, 4);
2987 1898180 : __m128i xmm_i = _mm_cvtsi32_si128(i);
2988 : // Zero extend 4 packed unsigned 8-bit integers in a to packed
2989 : // 32-bit integers.
2990 : #if __SSE4_1__
2991 : xmm_i = _mm_cvtepu8_epi32(xmm_i);
2992 : #else
2993 1898180 : xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
2994 1898180 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
2995 : #endif
2996 1898180 : return _mm_cvtepi32_ps(xmm_i);
2997 : }
2998 :
2999 5292 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
3000 : {
3001 : GUInt64 i;
3002 5292 : memcpy(&i, ptr, 8);
3003 10584 : __m128i xmm_i = _mm_cvtsi64_si128(i);
3004 : // Zero extend 4 packed unsigned 16-bit integers in a to packed
3005 : // 32-bit integers.
3006 : #if __SSE4_1__
3007 : xmm_i = _mm_cvtepu16_epi32(xmm_i);
3008 : #else
3009 10584 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3010 : #endif
3011 10584 : return _mm_cvtepi32_ps(xmm_i);
3012 : }
3013 :
3014 : /************************************************************************/
3015 : /* XMMHorizontalAdd() */
3016 : /* */
3017 : /* Return the sum of the 4 floating points of the register. */
3018 : /************************************************************************/
3019 :
3020 : #if __SSE3__
3021 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3022 : {
3023 : __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1)
3024 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0)
3025 : shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2)
3026 : sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2)
3027 : return _mm_cvtss_f32(sums);
3028 : }
3029 : #else
3030 238596 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3031 : {
3032 238596 : __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2)
3033 238596 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0)
3034 238596 : shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1)
3035 238596 : sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1)
3036 238596 : return _mm_cvtss_f32(sums);
3037 : }
3038 : #endif
3039 :
3040 : #endif // (defined(__x86_64) || defined(_M_X64))
3041 :
3042 : /************************************************************************/
3043 : /* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */
3044 : /************************************************************************/
3045 :
3046 : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
3047 : // because there are a few assumptions above those types.
3048 : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
3049 : // perf benefit.
3050 :
3051 : template <class T>
3052 361 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
3053 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3054 : double *pdfDensity, double *pdfReal)
3055 : {
3056 361 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3057 361 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3058 361 : const GPtrDiff_t iSrcOffset =
3059 361 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3060 :
3061 : // Get the bilinear interpolation at the image borders.
3062 361 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3063 361 : iSrcY + 2 >= poWK->nSrcYSize)
3064 : {
3065 0 : double adfImagIgnored[4] = {};
3066 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3067 0 : pdfDensity, pdfReal, adfImagIgnored);
3068 : }
3069 :
3070 : #if defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))
3071 : const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
3072 : const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
3073 :
3074 : // TODO(schwehr): Explain the magic numbers.
3075 : float afTemp[4 + 4 + 4 + 1];
3076 : float *pafAligned =
3077 : reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
3078 : float *pafCoeffs = pafAligned;
3079 : float *pafDensity = pafAligned + 4;
3080 : float *pafValue = pafAligned + 8;
3081 :
3082 : const float fHalfDeltaX = 0.5f * fDeltaX;
3083 : const float fThreeDeltaX = 3.0f * fDeltaX;
3084 : const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
3085 :
3086 : pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
3087 : pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
3088 : pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
3089 : pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
3090 : __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
3091 : const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD);
3092 :
3093 : __m128 xmmMaskLowDensity = _mm_setzero_ps();
3094 : for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
3095 : i++, iOffset += poWK->nSrcXSize)
3096 : {
3097 : const __m128 xmmDensity =
3098 : _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
3099 : xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
3100 : _mm_cmplt_ps(xmmDensity, xmmThreshold));
3101 : pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3102 :
3103 : const __m128 xmmValues =
3104 : XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
3105 : pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
3106 : }
3107 : if (_mm_movemask_ps(xmmMaskLowDensity))
3108 : {
3109 : double adfImagIgnored[4] = {};
3110 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3111 : pdfDensity, pdfReal, adfImagIgnored);
3112 : }
3113 :
3114 : const float fHalfDeltaY = 0.5f * fDeltaY;
3115 : const float fThreeDeltaY = 3.0f * fDeltaY;
3116 : const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
3117 :
3118 : pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
3119 : pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
3120 : pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
3121 : pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
3122 :
3123 : xmmCoeffs = _mm_load_ps(pafCoeffs);
3124 :
3125 : const __m128 xmmDensity = _mm_load_ps(pafDensity);
3126 : const __m128 xmmValue = _mm_load_ps(pafValue);
3127 : *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3128 : *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
3129 :
3130 : // We did all above computations on float32 whereas the general case is
3131 : // float64. Not sure if one is fundamentally more correct than the other
3132 : // one, but we want our optimization to give the same result as the
3133 : // general case as much as possible, so if the resulting value is
3134 : // close to some_int_value + 0.5, redo the computation with the general
3135 : // case.
3136 : // Note: If other types than Byte or UInt16, will need changes.
3137 : if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
3138 : return true;
3139 :
3140 : #endif // defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))
3141 :
3142 361 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3143 361 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3144 :
3145 361 : double adfValueDens[4] = {};
3146 361 : double adfValueReal[4] = {};
3147 :
3148 361 : double adfCoeffsX[4] = {};
3149 361 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3150 :
3151 361 : double adfCoeffsY[4] = {};
3152 361 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3153 :
3154 1433 : for (GPtrDiff_t i = -1; i < 3; i++)
3155 : {
3156 1177 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3157 : #if !(defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)))
3158 1177 : if (poWK->pafUnifiedSrcDensity[iOffset + 0] < SRC_DENSITY_THRESHOLD ||
3159 1089 : poWK->pafUnifiedSrcDensity[iOffset + 1] < SRC_DENSITY_THRESHOLD ||
3160 1089 : poWK->pafUnifiedSrcDensity[iOffset + 2] < SRC_DENSITY_THRESHOLD ||
3161 1089 : poWK->pafUnifiedSrcDensity[iOffset + 3] < SRC_DENSITY_THRESHOLD)
3162 : {
3163 105 : double adfImagIgnored[4] = {};
3164 105 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3165 : pdfDensity, pdfReal,
3166 105 : adfImagIgnored);
3167 : }
3168 : #endif
3169 :
3170 1072 : adfValueDens[i + 1] =
3171 1072 : CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
3172 :
3173 1072 : adfValueReal[i + 1] = CONVOL4(
3174 : adfCoeffsX,
3175 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3176 : }
3177 :
3178 256 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3179 256 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3180 :
3181 256 : return true;
3182 : }
3183 :
3184 : /************************************************************************/
3185 : /* GWKCubicResampleSrcMaskIsDensity4SampleReal() */
3186 : /* Bi-cubic when source has and only has pafUnifiedSrcDensity. */
3187 : /************************************************************************/
3188 :
3189 0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
3190 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3191 : double *pdfDensity, double *pdfReal)
3192 :
3193 : {
3194 0 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3195 0 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3196 0 : const GPtrDiff_t iSrcOffset =
3197 0 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3198 0 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3199 0 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3200 :
3201 : // Get the bilinear interpolation at the image borders.
3202 0 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3203 0 : iSrcY + 2 >= poWK->nSrcYSize)
3204 : {
3205 0 : double adfImagIgnored[4] = {};
3206 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3207 0 : pdfDensity, pdfReal, adfImagIgnored);
3208 : }
3209 :
3210 0 : double adfCoeffsX[4] = {};
3211 0 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3212 :
3213 0 : double adfCoeffsY[4] = {};
3214 0 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3215 :
3216 0 : double adfValueDens[4] = {};
3217 0 : double adfValueReal[4] = {};
3218 0 : double adfDensity[4] = {};
3219 0 : double adfReal[4] = {};
3220 0 : double adfImagIgnored[4] = {};
3221 :
3222 0 : for (GPtrDiff_t i = -1; i < 3; i++)
3223 : {
3224 0 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3225 0 : 2, adfDensity, adfReal, adfImagIgnored) ||
3226 0 : adfDensity[0] < SRC_DENSITY_THRESHOLD ||
3227 0 : adfDensity[1] < SRC_DENSITY_THRESHOLD ||
3228 0 : adfDensity[2] < SRC_DENSITY_THRESHOLD ||
3229 0 : adfDensity[3] < SRC_DENSITY_THRESHOLD)
3230 : {
3231 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3232 : pdfDensity, pdfReal,
3233 0 : adfImagIgnored);
3234 : }
3235 :
3236 0 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3237 0 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3238 : }
3239 :
3240 0 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3241 0 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3242 :
3243 0 : return true;
3244 : }
3245 :
3246 : template <class T>
3247 1906603 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3248 : int iBand, double dfSrcX,
3249 : double dfSrcY, T *pValue)
3250 :
3251 : {
3252 1906603 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3253 1906603 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3254 1906603 : const GPtrDiff_t iSrcOffset =
3255 1906603 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3256 1906603 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3257 1906603 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3258 1906603 : const double dfDeltaY2 = dfDeltaY * dfDeltaY;
3259 1906603 : const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
3260 :
3261 : // Get the bilinear interpolation at the image borders.
3262 1906603 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3263 1662527 : iSrcY + 2 >= poWK->nSrcYSize)
3264 303751 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3265 303751 : pValue);
3266 :
3267 1602852 : double adfCoeffs[4] = {};
3268 1602852 : GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
3269 :
3270 1602852 : double adfValue[4] = {};
3271 :
3272 8014250 : for (GPtrDiff_t i = -1; i < 3; i++)
3273 : {
3274 6411406 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3275 :
3276 6411406 : adfValue[i + 1] = CONVOL4(
3277 : adfCoeffs,
3278 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3279 : }
3280 :
3281 : const double dfValue =
3282 1602852 : CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
3283 : adfValue[1], adfValue[2], adfValue[3]);
3284 :
3285 1602852 : *pValue = GWKClampValueT<T>(dfValue);
3286 :
3287 1602852 : return true;
3288 : }
3289 :
3290 : /************************************************************************/
3291 : /* GWKLanczosSinc() */
3292 : /************************************************************************/
3293 :
3294 : /*
3295 : * Lanczos windowed sinc interpolation kernel with radius r.
3296 : * /
3297 : * | sinc(x) * sinc(x/r), if |x| < r
3298 : * L(x) = | 1, if x = 0 ,
3299 : * | 0, otherwise
3300 : * \
3301 : *
3302 : * where sinc(x) = sin(PI * x) / (PI * x).
3303 : */
3304 :
3305 1056 : static double GWKLanczosSinc(double dfX)
3306 : {
3307 1056 : if (dfX == 0.0)
3308 0 : return 1.0;
3309 :
3310 1056 : const double dfPIX = M_PI * dfX;
3311 1056 : const double dfPIXoverR = dfPIX / 3;
3312 1056 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3313 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3314 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3315 1056 : const double dfSinPIXoverR = sin(dfPIXoverR);
3316 1056 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3317 1056 : const double dfSinPIXMulSinPIXoverR =
3318 1056 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3319 1056 : return dfSinPIXMulSinPIXoverR / dfPIX2overR;
3320 : }
3321 :
3322 106242 : static double GWKLanczosSinc4Values(double *padfValues)
3323 : {
3324 531210 : for (int i = 0; i < 4; i++)
3325 : {
3326 424968 : if (padfValues[i] == 0.0)
3327 : {
3328 0 : padfValues[i] = 1.0;
3329 : }
3330 : else
3331 : {
3332 424968 : const double dfPIX = M_PI * padfValues[i];
3333 424968 : const double dfPIXoverR = dfPIX / 3;
3334 424968 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3335 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3336 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3337 424968 : const double dfSinPIXoverR = sin(dfPIXoverR);
3338 424968 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3339 424968 : const double dfSinPIXMulSinPIXoverR =
3340 424968 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3341 424968 : padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
3342 : }
3343 : }
3344 106242 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3345 : }
3346 :
3347 : /************************************************************************/
3348 : /* GWKBilinear() */
3349 : /************************************************************************/
3350 :
3351 6668810 : static double GWKBilinear(double dfX)
3352 : {
3353 6668810 : double dfAbsX = fabs(dfX);
3354 6668810 : if (dfAbsX <= 1.0)
3355 6197680 : return 1 - dfAbsX;
3356 : else
3357 471127 : return 0.0;
3358 : }
3359 :
3360 396102 : static double GWKBilinear4Values(double *padfValues)
3361 : {
3362 396102 : double dfAbsX0 = fabs(padfValues[0]);
3363 396102 : double dfAbsX1 = fabs(padfValues[1]);
3364 396102 : double dfAbsX2 = fabs(padfValues[2]);
3365 396102 : double dfAbsX3 = fabs(padfValues[3]);
3366 396102 : if (dfAbsX0 <= 1.0)
3367 290173 : padfValues[0] = 1 - dfAbsX0;
3368 : else
3369 105929 : padfValues[0] = 0.0;
3370 396102 : if (dfAbsX1 <= 1.0)
3371 396102 : padfValues[1] = 1 - dfAbsX1;
3372 : else
3373 0 : padfValues[1] = 0.0;
3374 396102 : if (dfAbsX2 <= 1.0)
3375 396102 : padfValues[2] = 1 - dfAbsX2;
3376 : else
3377 0 : padfValues[2] = 0.0;
3378 396102 : if (dfAbsX3 <= 1.0)
3379 290066 : padfValues[3] = 1 - dfAbsX3;
3380 : else
3381 106036 : padfValues[3] = 0.0;
3382 396102 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3383 : }
3384 :
3385 : /************************************************************************/
3386 : /* GWKCubic() */
3387 : /************************************************************************/
3388 :
3389 4339010 : static double GWKCubic(double dfX)
3390 : {
3391 4339010 : return CubicKernel(dfX);
3392 : }
3393 :
3394 7065530 : static double GWKCubic4Values(double *padfValues)
3395 : {
3396 7065530 : const double dfAbsX_0 = fabs(padfValues[0]);
3397 7065530 : const double dfAbsX_1 = fabs(padfValues[1]);
3398 7065530 : const double dfAbsX_2 = fabs(padfValues[2]);
3399 7065530 : const double dfAbsX_3 = fabs(padfValues[3]);
3400 7065530 : const double dfX2_0 = padfValues[0] * padfValues[0];
3401 7065530 : const double dfX2_1 = padfValues[1] * padfValues[1];
3402 7065530 : const double dfX2_2 = padfValues[2] * padfValues[2];
3403 7065530 : const double dfX2_3 = padfValues[3] * padfValues[3];
3404 :
3405 7065530 : double dfVal0 = 0.0;
3406 7065530 : if (dfAbsX_0 <= 1.0)
3407 1028070 : dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
3408 6037460 : else if (dfAbsX_0 <= 2.0)
3409 4287900 : dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
3410 :
3411 7065530 : double dfVal1 = 0.0;
3412 7065530 : if (dfAbsX_1 <= 1.0)
3413 4100200 : dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
3414 2965330 : else if (dfAbsX_1 <= 2.0)
3415 2964030 : dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
3416 :
3417 7065530 : double dfVal2 = 0.0;
3418 7065530 : if (dfAbsX_2 <= 1.0)
3419 5917860 : dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
3420 1147660 : else if (dfAbsX_2 <= 2.0)
3421 1149100 : dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
3422 :
3423 7065530 : double dfVal3 = 0.0;
3424 7065530 : if (dfAbsX_3 <= 1.0)
3425 3162770 : dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
3426 3902760 : else if (dfAbsX_3 <= 2.0)
3427 3648640 : dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
3428 :
3429 7065530 : padfValues[0] = dfVal0;
3430 7065530 : padfValues[1] = dfVal1;
3431 7065530 : padfValues[2] = dfVal2;
3432 7065530 : padfValues[3] = dfVal3;
3433 7065530 : return dfVal0 + dfVal1 + dfVal2 + dfVal3;
3434 : }
3435 :
3436 : /************************************************************************/
3437 : /* GWKBSpline() */
3438 : /************************************************************************/
3439 :
3440 : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
3441 : // Equation 8 with (B,C)=(1,0)
3442 : // 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1
3443 : // 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| >= 1 and |x| < 2
3444 :
3445 138696 : static double GWKBSpline(double x)
3446 : {
3447 138696 : const double xp2 = x + 2.0;
3448 138696 : const double xp1 = x + 1.0;
3449 138696 : const double xm1 = x - 1.0;
3450 :
3451 : // This will most likely be used, so we'll compute it ahead of time to
3452 : // avoid stalling the processor.
3453 138696 : const double xp2c = xp2 * xp2 * xp2;
3454 :
3455 : // Note that the test is computed only if it is needed.
3456 : // TODO(schwehr): Make this easier to follow.
3457 : return xp2 > 0.0
3458 277392 : ? ((xp1 > 0.0)
3459 138696 : ? ((x > 0.0)
3460 124338 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3461 89912 : 6.0 * x * x * x
3462 : : 0.0) +
3463 124338 : -4.0 * xp1 * xp1 * xp1
3464 : : 0.0) +
3465 : xp2c
3466 138696 : : 0.0; // * 0.166666666666666666666
3467 : }
3468 :
3469 2220360 : static double GWKBSpline4Values(double *padfValues)
3470 : {
3471 11101800 : for (int i = 0; i < 4; i++)
3472 : {
3473 8881440 : const double x = padfValues[i];
3474 8881440 : const double xp2 = x + 2.0;
3475 8881440 : const double xp1 = x + 1.0;
3476 8881440 : const double xm1 = x - 1.0;
3477 :
3478 : // This will most likely be used, so we'll compute it ahead of time to
3479 : // avoid stalling the processor.
3480 8881440 : const double xp2c = xp2 * xp2 * xp2;
3481 :
3482 : // Note that the test is computed only if it is needed.
3483 : // TODO(schwehr): Make this easier to follow.
3484 8881440 : padfValues[i] =
3485 : (xp2 > 0.0)
3486 17762900 : ? ((xp1 > 0.0)
3487 8881440 : ? ((x > 0.0)
3488 6660880 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3489 4437750 : 6.0 * x * x * x
3490 : : 0.0) +
3491 6660880 : -4.0 * xp1 * xp1 * xp1
3492 : : 0.0) +
3493 : xp2c
3494 : : 0.0; // * 0.166666666666666666666
3495 : }
3496 2220360 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3497 : }
3498 : /************************************************************************/
3499 : /* GWKResampleWrkStruct */
3500 : /************************************************************************/
3501 :
3502 : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
3503 :
3504 : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
3505 : double dfSrcX, double dfSrcY,
3506 : double *pdfDensity, double *pdfReal,
3507 : double *pdfImag,
3508 : GWKResampleWrkStruct *psWrkStruct);
3509 :
3510 : struct _GWKResampleWrkStruct
3511 : {
3512 : pfnGWKResampleType pfnGWKResample;
3513 :
3514 : // Space for saved X weights.
3515 : double *padfWeightsX;
3516 : bool *pabCalcX;
3517 :
3518 : double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos.
3519 : int iLastSrcX; // Only used by GWKResampleOptimizedLanczos.
3520 : int iLastSrcY; // Only used by GWKResampleOptimizedLanczos.
3521 : double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos.
3522 : double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos.
3523 : double dfCosPiXScale; // Only used by GWKResampleOptimizedLanczos.
3524 : double dfSinPiXScale; // Only used by GWKResampleOptimizedLanczos.
3525 : double dfCosPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3526 : double dfSinPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3527 : double dfCosPiYScale; // Only used by GWKResampleOptimizedLanczos.
3528 : double dfSinPiYScale; // Only used by GWKResampleOptimizedLanczos.
3529 : double dfCosPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3530 : double dfSinPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3531 :
3532 : // Space for saving a row of pixels.
3533 : double *padfRowDensity;
3534 : double *padfRowReal;
3535 : double *padfRowImag;
3536 : };
3537 :
3538 : /************************************************************************/
3539 : /* GWKResampleCreateWrkStruct() */
3540 : /************************************************************************/
3541 :
3542 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3543 : double dfSrcY, double *pdfDensity, double *pdfReal,
3544 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
3545 :
3546 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3547 : double dfSrcX, double dfSrcY,
3548 : double *pdfDensity, double *pdfReal,
3549 : double *pdfImag,
3550 : GWKResampleWrkStruct *psWrkStruct);
3551 :
3552 341 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
3553 : {
3554 341 : const int nXDist = (poWK->nXRadius + 1) * 2;
3555 341 : const int nYDist = (poWK->nYRadius + 1) * 2;
3556 :
3557 : GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
3558 341 : CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
3559 :
3560 : // Alloc space for saved X weights.
3561 341 : psWrkStruct->padfWeightsX =
3562 341 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3563 341 : psWrkStruct->pabCalcX =
3564 341 : static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
3565 :
3566 341 : psWrkStruct->padfWeightsY =
3567 341 : static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
3568 341 : psWrkStruct->iLastSrcX = -10;
3569 341 : psWrkStruct->iLastSrcY = -10;
3570 341 : psWrkStruct->dfLastDeltaX = -10;
3571 341 : psWrkStruct->dfLastDeltaY = -10;
3572 :
3573 : // Alloc space for saving a row of pixels.
3574 341 : if (poWK->pafUnifiedSrcDensity == nullptr &&
3575 314 : poWK->panUnifiedSrcValid == nullptr &&
3576 302 : poWK->papanBandSrcValid == nullptr)
3577 : {
3578 302 : psWrkStruct->padfRowDensity = nullptr;
3579 : }
3580 : else
3581 : {
3582 39 : psWrkStruct->padfRowDensity =
3583 39 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3584 : }
3585 341 : psWrkStruct->padfRowReal =
3586 341 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3587 341 : psWrkStruct->padfRowImag =
3588 341 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3589 :
3590 341 : if (poWK->eResample == GRA_Lanczos)
3591 : {
3592 63 : psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
3593 :
3594 63 : if (poWK->dfXScale < 1)
3595 : {
3596 4 : psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
3597 4 : psWrkStruct->dfSinPiXScaleOver3 =
3598 4 : sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
3599 4 : psWrkStruct->dfCosPiXScaleOver3);
3600 : // "Naive":
3601 : // const double dfCosPiXScale = cos( M_PI * dfXScale );
3602 : // const double dfSinPiXScale = sin( M_PI * dfXScale );
3603 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3604 4 : psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
3605 4 : psWrkStruct->dfCosPiXScaleOver3 -
3606 4 : 3) *
3607 4 : psWrkStruct->dfCosPiXScaleOver3;
3608 4 : psWrkStruct->dfSinPiXScale = sqrt(
3609 4 : 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
3610 : }
3611 :
3612 63 : if (poWK->dfYScale < 1)
3613 : {
3614 11 : psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
3615 11 : psWrkStruct->dfSinPiYScaleOver3 =
3616 11 : sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
3617 11 : psWrkStruct->dfCosPiYScaleOver3);
3618 : // "Naive":
3619 : // const double dfCosPiYScale = cos( M_PI * dfYScale );
3620 : // const double dfSinPiYScale = sin( M_PI * dfYScale );
3621 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3622 11 : psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
3623 11 : psWrkStruct->dfCosPiYScaleOver3 -
3624 11 : 3) *
3625 11 : psWrkStruct->dfCosPiYScaleOver3;
3626 11 : psWrkStruct->dfSinPiYScale = sqrt(
3627 11 : 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
3628 : }
3629 : }
3630 : else
3631 278 : psWrkStruct->pfnGWKResample = GWKResample;
3632 :
3633 341 : return psWrkStruct;
3634 : }
3635 :
3636 : /************************************************************************/
3637 : /* GWKResampleDeleteWrkStruct() */
3638 : /************************************************************************/
3639 :
3640 341 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
3641 : {
3642 341 : CPLFree(psWrkStruct->padfWeightsX);
3643 341 : CPLFree(psWrkStruct->padfWeightsY);
3644 341 : CPLFree(psWrkStruct->pabCalcX);
3645 341 : CPLFree(psWrkStruct->padfRowDensity);
3646 341 : CPLFree(psWrkStruct->padfRowReal);
3647 341 : CPLFree(psWrkStruct->padfRowImag);
3648 341 : CPLFree(psWrkStruct);
3649 341 : }
3650 :
3651 : /************************************************************************/
3652 : /* GWKResample() */
3653 : /************************************************************************/
3654 :
3655 239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3656 : double dfSrcY, double *pdfDensity, double *pdfReal,
3657 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
3658 :
3659 : {
3660 : // Save as local variables to avoid following pointers in loops.
3661 239383 : const int nSrcXSize = poWK->nSrcXSize;
3662 239383 : const int nSrcYSize = poWK->nSrcYSize;
3663 :
3664 239383 : double dfAccumulatorReal = 0.0;
3665 239383 : double dfAccumulatorImag = 0.0;
3666 239383 : double dfAccumulatorDensity = 0.0;
3667 239383 : double dfAccumulatorWeight = 0.0;
3668 239383 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3669 239383 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3670 239383 : const GPtrDiff_t iSrcOffset =
3671 239383 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3672 239383 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3673 239383 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3674 :
3675 239383 : const double dfXScale = poWK->dfXScale;
3676 239383 : const double dfYScale = poWK->dfYScale;
3677 :
3678 239383 : const int nXDist = (poWK->nXRadius + 1) * 2;
3679 :
3680 : // Space for saved X weights.
3681 239383 : double *padfWeightsX = psWrkStruct->padfWeightsX;
3682 239383 : bool *pabCalcX = psWrkStruct->pabCalcX;
3683 :
3684 : // Space for saving a row of pixels.
3685 239383 : double *padfRowDensity = psWrkStruct->padfRowDensity;
3686 239383 : double *padfRowReal = psWrkStruct->padfRowReal;
3687 239383 : double *padfRowImag = psWrkStruct->padfRowImag;
3688 :
3689 : // Mark as needing calculation (don't calculate the weights yet,
3690 : // because a mask may render it unnecessary).
3691 239383 : memset(pabCalcX, false, nXDist * sizeof(bool));
3692 :
3693 239383 : FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
3694 239383 : CPLAssert(pfnGetWeight);
3695 :
3696 : // Skip sampling over edge of image.
3697 239383 : int j = poWK->nFiltInitY;
3698 239383 : int jMax = poWK->nYRadius;
3699 239383 : if (iSrcY + j < 0)
3700 566 : j = -iSrcY;
3701 239383 : if (iSrcY + jMax >= nSrcYSize)
3702 662 : jMax = nSrcYSize - iSrcY - 1;
3703 :
3704 239383 : int iMin = poWK->nFiltInitX;
3705 239383 : int iMax = poWK->nXRadius;
3706 239383 : if (iSrcX + iMin < 0)
3707 566 : iMin = -iSrcX;
3708 239383 : if (iSrcX + iMax >= nSrcXSize)
3709 659 : iMax = nSrcXSize - iSrcX - 1;
3710 :
3711 239383 : const int bXScaleBelow1 = (dfXScale < 1.0);
3712 239383 : const int bYScaleBelow1 = (dfYScale < 1.0);
3713 :
3714 239383 : GPtrDiff_t iRowOffset =
3715 239383 : iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
3716 :
3717 : // Loop over pixel rows in the kernel.
3718 1445930 : for (; j <= jMax; ++j)
3719 : {
3720 1206540 : iRowOffset += nSrcXSize;
3721 :
3722 : // Get pixel values.
3723 : // We can potentially read extra elements after the "normal" end of the
3724 : // source arrays, but the contract of papabySrcImage[iBand],
3725 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
3726 : // is to have WARP_EXTRA_ELTS reserved at their end.
3727 1206540 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
3728 : padfRowDensity, padfRowReal, padfRowImag))
3729 72 : continue;
3730 :
3731 : // Calculate the Y weight.
3732 : double dfWeight1 = (bYScaleBelow1)
3733 1206470 : ? pfnGetWeight((j - dfDeltaY) * dfYScale)
3734 1600 : : pfnGetWeight(j - dfDeltaY);
3735 :
3736 : // Iterate over pixels in row.
3737 1206470 : double dfAccumulatorRealLocal = 0.0;
3738 1206470 : double dfAccumulatorImagLocal = 0.0;
3739 1206470 : double dfAccumulatorDensityLocal = 0.0;
3740 1206470 : double dfAccumulatorWeightLocal = 0.0;
3741 :
3742 7317420 : for (int i = iMin; i <= iMax; ++i)
3743 : {
3744 : // Skip sampling if pixel has zero density.
3745 6110940 : if (padfRowDensity != nullptr &&
3746 77277 : padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
3747 546 : continue;
3748 :
3749 6110400 : double dfWeight2 = 0.0;
3750 :
3751 : // Make or use a cached set of weights for this row.
3752 6110400 : if (pabCalcX[i - iMin])
3753 : {
3754 : // Use saved weight value instead of recomputing it.
3755 4903920 : dfWeight2 = padfWeightsX[i - iMin];
3756 : }
3757 : else
3758 : {
3759 : // Calculate & save the X weight.
3760 1206480 : padfWeightsX[i - iMin] = dfWeight2 =
3761 1206480 : (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
3762 1600 : : pfnGetWeight(i - dfDeltaX);
3763 :
3764 1206480 : pabCalcX[i - iMin] = true;
3765 : }
3766 :
3767 : // Accumulate!
3768 6110400 : dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
3769 6110400 : dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
3770 6110400 : if (padfRowDensity != nullptr)
3771 76731 : dfAccumulatorDensityLocal +=
3772 76731 : padfRowDensity[i - iMin] * dfWeight2;
3773 6110400 : dfAccumulatorWeightLocal += dfWeight2;
3774 : }
3775 :
3776 1206470 : dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
3777 1206470 : dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
3778 1206470 : dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
3779 1206470 : dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
3780 : }
3781 :
3782 239383 : if (dfAccumulatorWeight < 0.000001 ||
3783 1887 : (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
3784 : {
3785 0 : *pdfDensity = 0.0;
3786 0 : return false;
3787 : }
3788 :
3789 : // Calculate the output taking into account weighting.
3790 239383 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
3791 : {
3792 239380 : *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
3793 239380 : *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
3794 239380 : if (padfRowDensity != nullptr)
3795 1884 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
3796 : else
3797 237496 : *pdfDensity = 1.0;
3798 : }
3799 : else
3800 : {
3801 3 : *pdfReal = dfAccumulatorReal;
3802 3 : *pdfImag = dfAccumulatorImag;
3803 3 : if (padfRowDensity != nullptr)
3804 3 : *pdfDensity = dfAccumulatorDensity;
3805 : else
3806 0 : *pdfDensity = 1.0;
3807 : }
3808 :
3809 239383 : return true;
3810 : }
3811 :
3812 : /************************************************************************/
3813 : /* GWKResampleOptimizedLanczos() */
3814 : /************************************************************************/
3815 :
3816 617144 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3817 : double dfSrcX, double dfSrcY,
3818 : double *pdfDensity, double *pdfReal,
3819 : double *pdfImag,
3820 : GWKResampleWrkStruct *psWrkStruct)
3821 :
3822 : {
3823 : // Save as local variables to avoid following pointers in loops.
3824 617144 : const int nSrcXSize = poWK->nSrcXSize;
3825 617144 : const int nSrcYSize = poWK->nSrcYSize;
3826 :
3827 617144 : double dfAccumulatorReal = 0.0;
3828 617144 : double dfAccumulatorImag = 0.0;
3829 617144 : double dfAccumulatorDensity = 0.0;
3830 617144 : double dfAccumulatorWeight = 0.0;
3831 617144 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3832 617144 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3833 617144 : const GPtrDiff_t iSrcOffset =
3834 617144 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3835 617144 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3836 617144 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3837 :
3838 617144 : const double dfXScale = poWK->dfXScale;
3839 617144 : const double dfYScale = poWK->dfYScale;
3840 :
3841 : // Space for saved X weights.
3842 617144 : double *const padfWeightsXShifted =
3843 617144 : psWrkStruct->padfWeightsX - poWK->nFiltInitX;
3844 617144 : double *const padfWeightsYShifted =
3845 617144 : psWrkStruct->padfWeightsY - poWK->nFiltInitY;
3846 :
3847 : // Space for saving a row of pixels.
3848 617144 : double *const padfRowDensity = psWrkStruct->padfRowDensity;
3849 617144 : double *const padfRowReal = psWrkStruct->padfRowReal;
3850 617144 : double *const padfRowImag = psWrkStruct->padfRowImag;
3851 :
3852 : // Skip sampling over edge of image.
3853 617144 : int jMin = poWK->nFiltInitY;
3854 617144 : int jMax = poWK->nYRadius;
3855 617144 : if (iSrcY + jMin < 0)
3856 16522 : jMin = -iSrcY;
3857 617144 : if (iSrcY + jMax >= nSrcYSize)
3858 5782 : jMax = nSrcYSize - iSrcY - 1;
3859 :
3860 617144 : int iMin = poWK->nFiltInitX;
3861 617144 : int iMax = poWK->nXRadius;
3862 617144 : if (iSrcX + iMin < 0)
3863 15797 : iMin = -iSrcX;
3864 617144 : if (iSrcX + iMax >= nSrcXSize)
3865 4657 : iMax = nSrcXSize - iSrcX - 1;
3866 :
3867 617144 : if (dfXScale < 1.0)
3868 : {
3869 403041 : while ((iMin - dfDeltaX) * dfXScale < -3.0)
3870 200179 : iMin++;
3871 202862 : while ((iMax - dfDeltaX) * dfXScale > 3.0)
3872 0 : iMax--;
3873 :
3874 : // clang-format off
3875 : /*
3876 : Naive version:
3877 : for (int i = iMin; i <= iMax; ++i)
3878 : {
3879 : psWrkStruct->padfWeightsXShifted[i] =
3880 : GWKLanczosSinc((i - dfDeltaX) * dfXScale);
3881 : }
3882 :
3883 : but given that:
3884 :
3885 : GWKLanczosSinc(x):
3886 : if (dfX == 0.0)
3887 : return 1.0;
3888 :
3889 : const double dfPIX = M_PI * dfX;
3890 : const double dfPIXoverR = dfPIX / 3;
3891 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3892 : return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
3893 :
3894 : and
3895 : sin (a + b) = sin a cos b + cos a sin b.
3896 : cos (a + b) = cos a cos b - sin a sin b.
3897 :
3898 : we can skip any sin() computation within the loop
3899 : */
3900 : // clang-format on
3901 :
3902 202862 : if (iSrcX != psWrkStruct->iLastSrcX ||
3903 131072 : dfDeltaX != psWrkStruct->dfLastDeltaX)
3904 : {
3905 71790 : double dfX = (iMin - dfDeltaX) * dfXScale;
3906 :
3907 71790 : double dfPIXover3 = M_PI / 3 * dfX;
3908 71790 : double dfCosOver3 = cos(dfPIXover3);
3909 71790 : double dfSinOver3 = sin(dfPIXover3);
3910 :
3911 : // "Naive":
3912 : // double dfSin = sin( M_PI * dfX );
3913 : // double dfCos = cos( M_PI * dfX );
3914 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
3915 71790 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
3916 71790 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
3917 :
3918 71790 : const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
3919 71790 : const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
3920 71790 : const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
3921 71790 : const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
3922 71790 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
3923 71790 : padfWeightsXShifted[iMin] =
3924 71790 : dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
3925 1636480 : for (int i = iMin + 1; i <= iMax; ++i)
3926 : {
3927 1564690 : dfX += dfXScale;
3928 1564690 : const double dfNewSin =
3929 1564690 : dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
3930 1564690 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
3931 1564690 : dfCosOver3 * dfSinPiXScaleOver3;
3932 1564690 : padfWeightsXShifted[i] =
3933 : dfX == 0
3934 1564690 : ? 1.0
3935 1564690 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
3936 1564690 : const double dfNewCos =
3937 1564690 : dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
3938 1564690 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
3939 1564690 : dfSinOver3 * dfSinPiXScaleOver3;
3940 1564690 : dfSin = dfNewSin;
3941 1564690 : dfCos = dfNewCos;
3942 1564690 : dfSinOver3 = dfNewSinOver3;
3943 1564690 : dfCosOver3 = dfNewCosOver3;
3944 : }
3945 :
3946 71790 : psWrkStruct->iLastSrcX = iSrcX;
3947 71790 : psWrkStruct->dfLastDeltaX = dfDeltaX;
3948 : }
3949 : }
3950 : else
3951 : {
3952 757542 : while (iMin - dfDeltaX < -3.0)
3953 343260 : iMin++;
3954 414282 : while (iMax - dfDeltaX > 3.0)
3955 0 : iMax--;
3956 :
3957 414282 : if (iSrcX != psWrkStruct->iLastSrcX ||
3958 209580 : dfDeltaX != psWrkStruct->dfLastDeltaX)
3959 : {
3960 : // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
3961 : // following trigonometric formulas.
3962 :
3963 : // TODO(schwehr): Move this somewhere where it can be rendered at
3964 : // LaTeX.
3965 : // clang-format off
3966 : // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
3967 : // cos(M_PI * dfBase) * sin(M_PI * k)
3968 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
3969 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
3970 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
3971 :
3972 : // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
3973 : // cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
3974 : // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
3975 : // clang-format on
3976 :
3977 414282 : const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
3978 414282 : const double dfSin2PIDeltaXOver3 =
3979 : dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
3980 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
3981 414282 : const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
3982 414282 : const double dfSinPIDeltaX =
3983 414282 : (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
3984 414282 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
3985 414282 : const double dfInvPI2Over3xSinPIDeltaX =
3986 : dfInvPI2Over3 * dfSinPIDeltaX;
3987 414282 : const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
3988 414282 : -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
3989 414282 : const double dfSinPIOver3 = 0.8660254037844386;
3990 414282 : const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
3991 414282 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
3992 : const double padfCst[] = {
3993 414282 : dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
3994 414282 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
3995 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
3996 414282 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
3997 414282 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
3998 :
3999 2936860 : for (int i = iMin; i <= iMax; ++i)
4000 : {
4001 2522570 : const double dfX = i - dfDeltaX;
4002 2522570 : if (dfX == 0.0)
4003 58282 : padfWeightsXShifted[i] = 1.0;
4004 : else
4005 2464290 : padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
4006 : #if DEBUG_VERBOSE
4007 : // TODO(schwehr): AlmostEqual.
4008 : // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
4009 : // GWKLanczosSinc(dfX, 3.0)) < 1e-10);
4010 : #endif
4011 : }
4012 :
4013 414282 : psWrkStruct->iLastSrcX = iSrcX;
4014 414282 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4015 : }
4016 : }
4017 :
4018 617144 : if (dfYScale < 1.0)
4019 : {
4020 403116 : while ((jMin - dfDeltaY) * dfYScale < -3.0)
4021 200254 : jMin++;
4022 202862 : while ((jMax - dfDeltaY) * dfYScale > 3.0)
4023 0 : jMax--;
4024 :
4025 : // clang-format off
4026 : /*
4027 : Naive version:
4028 : for (int j = jMin; j <= jMax; ++j)
4029 : {
4030 : padfWeightsYShifted[j] =
4031 : GWKLanczosSinc((j - dfDeltaY) * dfYScale);
4032 : }
4033 : */
4034 : // clang-format on
4035 :
4036 202862 : if (iSrcY != psWrkStruct->iLastSrcY ||
4037 202479 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4038 : {
4039 383 : double dfY = (jMin - dfDeltaY) * dfYScale;
4040 :
4041 383 : double dfPIYover3 = M_PI / 3 * dfY;
4042 383 : double dfCosOver3 = cos(dfPIYover3);
4043 383 : double dfSinOver3 = sin(dfPIYover3);
4044 :
4045 : // "Naive":
4046 : // double dfSin = sin( M_PI * dfY );
4047 : // double dfCos = cos( M_PI * dfY );
4048 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4049 383 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4050 383 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4051 :
4052 383 : const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
4053 383 : const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
4054 383 : const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
4055 383 : const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
4056 383 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4057 383 : padfWeightsYShifted[jMin] =
4058 383 : dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
4059 7318 : for (int j = jMin + 1; j <= jMax; ++j)
4060 : {
4061 6935 : dfY += dfYScale;
4062 6935 : const double dfNewSin =
4063 6935 : dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
4064 6935 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
4065 6935 : dfCosOver3 * dfSinPiYScaleOver3;
4066 6935 : padfWeightsYShifted[j] =
4067 : dfY == 0
4068 6935 : ? 1.0
4069 6935 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
4070 6935 : const double dfNewCos =
4071 6935 : dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
4072 6935 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
4073 6935 : dfSinOver3 * dfSinPiYScaleOver3;
4074 6935 : dfSin = dfNewSin;
4075 6935 : dfCos = dfNewCos;
4076 6935 : dfSinOver3 = dfNewSinOver3;
4077 6935 : dfCosOver3 = dfNewCosOver3;
4078 : }
4079 :
4080 383 : psWrkStruct->iLastSrcY = iSrcY;
4081 383 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4082 : }
4083 : }
4084 : else
4085 : {
4086 684742 : while (jMin - dfDeltaY < -3.0)
4087 270460 : jMin++;
4088 414282 : while (jMax - dfDeltaY > 3.0)
4089 0 : jMax--;
4090 :
4091 414282 : if (iSrcY != psWrkStruct->iLastSrcY ||
4092 413663 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4093 : {
4094 1132 : const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
4095 1132 : const double dfSin2PIDeltaYOver3 =
4096 : dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
4097 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
4098 1132 : const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
4099 1132 : const double dfSinPIDeltaY =
4100 1132 : (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
4101 1132 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4102 1132 : const double dfInvPI2Over3xSinPIDeltaY =
4103 : dfInvPI2Over3 * dfSinPIDeltaY;
4104 1132 : const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
4105 1132 : -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
4106 1132 : const double dfSinPIOver3 = 0.8660254037844386;
4107 1132 : const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
4108 1132 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
4109 : const double padfCst[] = {
4110 1132 : dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
4111 1132 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
4112 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
4113 1132 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
4114 1132 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
4115 :
4116 7917 : for (int j = jMin; j <= jMax; ++j)
4117 : {
4118 6785 : const double dfY = j - dfDeltaY;
4119 6785 : if (dfY == 0.0)
4120 460 : padfWeightsYShifted[j] = 1.0;
4121 : else
4122 6325 : padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
4123 : #if DEBUG_VERBOSE
4124 : // TODO(schwehr): AlmostEqual.
4125 : // CPLAssert(fabs(padfWeightsYShifted[j] -
4126 : // GWKLanczosSinc(dfY, 3.0)) < 1e-10);
4127 : #endif
4128 : }
4129 :
4130 1132 : psWrkStruct->iLastSrcY = iSrcY;
4131 1132 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4132 : }
4133 : }
4134 :
4135 : // If we have no density information, we can simply compute the
4136 : // accumulated weight.
4137 617144 : if (padfRowDensity == nullptr)
4138 : {
4139 617144 : double dfRowAccWeight = 0.0;
4140 7903490 : for (int i = iMin; i <= iMax; ++i)
4141 : {
4142 7286350 : dfRowAccWeight += padfWeightsXShifted[i];
4143 : }
4144 617144 : double dfColAccWeight = 0.0;
4145 7958040 : for (int j = jMin; j <= jMax; ++j)
4146 : {
4147 7340900 : dfColAccWeight += padfWeightsYShifted[j];
4148 : }
4149 617144 : dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
4150 : }
4151 :
4152 : // Loop over pixel rows in the kernel.
4153 :
4154 617144 : if (poWK->eWorkingDataType == GDT_Byte && !poWK->panUnifiedSrcValid &&
4155 616524 : !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
4156 : !padfRowDensity)
4157 : {
4158 : // Optimization for Byte case without any masking/alpha
4159 :
4160 616524 : if (dfAccumulatorWeight < 0.000001)
4161 : {
4162 0 : *pdfDensity = 0.0;
4163 0 : return false;
4164 : }
4165 :
4166 616524 : const GByte *pSrc =
4167 616524 : reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
4168 616524 : pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4169 :
4170 : #if defined(__x86_64) || defined(_M_X64)
4171 616524 : if (iMax - iMin + 1 == 6)
4172 : {
4173 : // This is just an optimized version of the general case in
4174 : // the else clause.
4175 :
4176 346854 : pSrc += iMin;
4177 346854 : int j = jMin;
4178 : const auto fourXWeights =
4179 346854 : XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
4180 :
4181 : // Process 2 lines at the same time.
4182 1375860 : for (; j < jMax; j += 2)
4183 : {
4184 : const XMMReg4Double v_acc =
4185 1029000 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4186 : const XMMReg4Double v_acc2 =
4187 1029000 : XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
4188 1029000 : const double dfRowAcc = v_acc.GetHorizSum();
4189 1029000 : const double dfRowAccEnd =
4190 1029000 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4191 1029000 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4192 1029000 : dfAccumulatorReal +=
4193 1029000 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4194 1029000 : const double dfRowAcc2 = v_acc2.GetHorizSum();
4195 1029000 : const double dfRowAcc2End =
4196 1029000 : pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
4197 1029000 : pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
4198 1029000 : dfAccumulatorReal +=
4199 1029000 : (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
4200 1029000 : pSrc += 2 * nSrcXSize;
4201 : }
4202 346854 : if (j == jMax)
4203 : {
4204 : // Process last line if there's an odd number of them.
4205 :
4206 : const XMMReg4Double v_acc =
4207 86045 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4208 86045 : const double dfRowAcc = v_acc.GetHorizSum();
4209 86045 : const double dfRowAccEnd =
4210 86045 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4211 86045 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4212 86045 : dfAccumulatorReal +=
4213 86045 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4214 : }
4215 : }
4216 : else
4217 : #endif
4218 : {
4219 5463580 : for (int j = jMin; j <= jMax; ++j)
4220 : {
4221 5193900 : int i = iMin;
4222 5193900 : double dfRowAcc1 = 0.0;
4223 5193900 : double dfRowAcc2 = 0.0;
4224 : // A bit of loop unrolling
4225 62750600 : for (; i < iMax; i += 2)
4226 : {
4227 57556700 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4228 57556700 : dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
4229 : }
4230 5193900 : if (i == iMax)
4231 : {
4232 : // Process last column if there's an odd number of them.
4233 426183 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4234 : }
4235 :
4236 5193900 : dfAccumulatorReal +=
4237 5193900 : (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
4238 5193900 : pSrc += nSrcXSize;
4239 : }
4240 : }
4241 :
4242 : // Calculate the output taking into account weighting.
4243 616524 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4244 : {
4245 569230 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4246 569230 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4247 569230 : *pdfDensity = 1.0;
4248 : }
4249 : else
4250 : {
4251 47294 : *pdfReal = dfAccumulatorReal;
4252 47294 : *pdfDensity = 1.0;
4253 : }
4254 :
4255 616524 : return true;
4256 : }
4257 :
4258 620 : GPtrDiff_t iRowOffset =
4259 620 : iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
4260 :
4261 620 : int nCountValid = 0;
4262 620 : const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
4263 :
4264 3560 : for (int j = jMin; j <= jMax; ++j)
4265 : {
4266 2940 : iRowOffset += nSrcXSize;
4267 :
4268 : // Get pixel values.
4269 : // We can potentially read extra elements after the "normal" end of the
4270 : // source arrays, but the contract of papabySrcImage[iBand],
4271 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4272 : // is to have WARP_EXTRA_ELTS reserved at their end.
4273 2940 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4274 : padfRowDensity, padfRowReal, padfRowImag))
4275 0 : continue;
4276 :
4277 2940 : const double dfWeight1 = padfWeightsYShifted[j];
4278 :
4279 : // Iterate over pixels in row.
4280 2940 : if (padfRowDensity != nullptr)
4281 : {
4282 0 : for (int i = iMin; i <= iMax; ++i)
4283 : {
4284 : // Skip sampling if pixel has zero density.
4285 0 : if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
4286 0 : continue;
4287 :
4288 0 : nCountValid++;
4289 :
4290 : // Use a cached set of weights for this row.
4291 0 : const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
4292 :
4293 : // Accumulate!
4294 0 : dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
4295 0 : dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
4296 0 : dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
4297 0 : dfAccumulatorWeight += dfWeight2;
4298 : }
4299 : }
4300 2940 : else if (bIsNonComplex)
4301 : {
4302 1764 : double dfRowAccReal = 0.0;
4303 10560 : for (int i = iMin; i <= iMax; ++i)
4304 : {
4305 8796 : const double dfWeight2 = padfWeightsXShifted[i];
4306 :
4307 : // Accumulate!
4308 8796 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4309 : }
4310 :
4311 1764 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4312 : }
4313 : else
4314 : {
4315 1176 : double dfRowAccReal = 0.0;
4316 1176 : double dfRowAccImag = 0.0;
4317 7040 : for (int i = iMin; i <= iMax; ++i)
4318 : {
4319 5864 : const double dfWeight2 = padfWeightsXShifted[i];
4320 :
4321 : // Accumulate!
4322 5864 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4323 5864 : dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
4324 : }
4325 :
4326 1176 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4327 1176 : dfAccumulatorImag += dfRowAccImag * dfWeight1;
4328 : }
4329 : }
4330 :
4331 620 : if (dfAccumulatorWeight < 0.000001 ||
4332 0 : (padfRowDensity != nullptr &&
4333 0 : (dfAccumulatorDensity < 0.000001 ||
4334 0 : nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
4335 : {
4336 0 : *pdfDensity = 0.0;
4337 0 : return false;
4338 : }
4339 :
4340 : // Calculate the output taking into account weighting.
4341 620 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4342 : {
4343 0 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4344 0 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4345 0 : *pdfImag = dfAccumulatorImag * dfInvAcc;
4346 0 : if (padfRowDensity != nullptr)
4347 0 : *pdfDensity = dfAccumulatorDensity * dfInvAcc;
4348 : else
4349 0 : *pdfDensity = 1.0;
4350 : }
4351 : else
4352 : {
4353 620 : *pdfReal = dfAccumulatorReal;
4354 620 : *pdfImag = dfAccumulatorImag;
4355 620 : if (padfRowDensity != nullptr)
4356 0 : *pdfDensity = dfAccumulatorDensity;
4357 : else
4358 620 : *pdfDensity = 1.0;
4359 : }
4360 :
4361 620 : return true;
4362 : }
4363 :
4364 : /************************************************************************/
4365 : /* GWKComputeWeights() */
4366 : /************************************************************************/
4367 :
4368 3747920 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
4369 : double dfDeltaX, double dfXScale, int jMin,
4370 : int jMax, double dfDeltaY, double dfYScale,
4371 : double *padfWeightsHorizontal,
4372 : double *padfWeightsVertical, double &dfInvWeights)
4373 : {
4374 :
4375 3747920 : const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
4376 3747920 : CPLAssert(pfnGetWeight);
4377 3747920 : const FilterFunc4ValuesType pfnGetWeight4Values =
4378 3747920 : apfGWKFilter4Values[eResample];
4379 3747920 : CPLAssert(pfnGetWeight4Values);
4380 :
4381 3747920 : int i = iMin; // Used after for.
4382 3747920 : int iC = 0; // Used after for.
4383 3747920 : double dfAccumulatorWeightHorizontal = 0.0;
4384 8316580 : for (; i + 2 < iMax; i += 4, iC += 4)
4385 : {
4386 4568320 : padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
4387 4568320 : padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
4388 4568320 : padfWeightsHorizontal[iC + 2] =
4389 4568320 : padfWeightsHorizontal[iC + 1] + dfXScale;
4390 4568320 : padfWeightsHorizontal[iC + 3] =
4391 4568320 : padfWeightsHorizontal[iC + 2] + dfXScale;
4392 4568660 : dfAccumulatorWeightHorizontal +=
4393 4568320 : pfnGetWeight4Values(padfWeightsHorizontal + iC);
4394 : }
4395 3962520 : for (; i <= iMax; ++i, ++iC)
4396 : {
4397 220112 : const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
4398 214256 : padfWeightsHorizontal[iC] = dfWeight;
4399 214256 : dfAccumulatorWeightHorizontal += dfWeight;
4400 : }
4401 :
4402 3742410 : int j = jMin; // Used after for.
4403 3742410 : int jC = 0; // Used after for.
4404 3742410 : double dfAccumulatorWeightVertical = 0.0;
4405 7890240 : for (; j + 2 < jMax; j += 4, jC += 4)
4406 : {
4407 4146000 : padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
4408 4146000 : padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
4409 4146000 : padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
4410 4146000 : padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
4411 4147840 : dfAccumulatorWeightVertical +=
4412 4146000 : pfnGetWeight4Values(padfWeightsVertical + jC);
4413 : }
4414 8253170 : for (; j <= jMax; ++j, ++jC)
4415 : {
4416 4510710 : const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
4417 4508930 : padfWeightsVertical[jC] = dfWeight;
4418 4508930 : dfAccumulatorWeightVertical += dfWeight;
4419 : }
4420 :
4421 3742460 : dfInvWeights =
4422 3742460 : 1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
4423 3742460 : }
4424 :
4425 : /************************************************************************/
4426 : /* GWKResampleNoMasksT() */
4427 : /************************************************************************/
4428 :
4429 : template <class T>
4430 : static bool
4431 : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4432 : double dfSrcY, T *pValue, double *padfWeightsHorizontal,
4433 : double *padfWeightsVertical, double &dfInvWeights)
4434 :
4435 : {
4436 : // Commonly used; save locally.
4437 : const int nSrcXSize = poWK->nSrcXSize;
4438 : const int nSrcYSize = poWK->nSrcYSize;
4439 :
4440 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4441 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4442 : const GPtrDiff_t iSrcOffset =
4443 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4444 :
4445 : const int nXRadius = poWK->nXRadius;
4446 : const int nYRadius = poWK->nYRadius;
4447 :
4448 : // Politely refuse to process invalid coordinates or obscenely small image.
4449 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4450 : nYRadius > nSrcYSize)
4451 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4452 : pValue);
4453 :
4454 : T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
4455 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4456 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4457 :
4458 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4459 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4460 :
4461 : int iMin = 1 - nXRadius;
4462 : if (iSrcX + iMin < 0)
4463 : iMin = -iSrcX;
4464 : int iMax = nXRadius;
4465 : if (iSrcX + iMax >= nSrcXSize - 1)
4466 : iMax = nSrcXSize - 1 - iSrcX;
4467 :
4468 : int jMin = 1 - nYRadius;
4469 : if (iSrcY + jMin < 0)
4470 : jMin = -iSrcY;
4471 : int jMax = nYRadius;
4472 : if (iSrcY + jMax >= nSrcYSize - 1)
4473 : jMax = nSrcYSize - 1 - iSrcY;
4474 :
4475 : if (iBand == 0)
4476 : {
4477 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4478 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4479 : padfWeightsVertical, dfInvWeights);
4480 : }
4481 :
4482 : // Loop over all rows in the kernel.
4483 : double dfAccumulator = 0.0;
4484 : for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
4485 : {
4486 : const GPtrDiff_t iSampJ =
4487 : iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
4488 :
4489 : // Loop over all pixels in the row.
4490 : double dfAccumulatorLocal = 0.0;
4491 : double dfAccumulatorLocal2 = 0.0;
4492 : int iC = 0;
4493 : int i = iMin;
4494 : // Process by chunk of 4 cols.
4495 : for (; i + 2 < iMax; i += 4, iC += 4)
4496 : {
4497 : // Retrieve the pixel & accumulate.
4498 : dfAccumulatorLocal +=
4499 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4500 : dfAccumulatorLocal +=
4501 : pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4502 : dfAccumulatorLocal2 +=
4503 : pSrcBand[i + 2 + iSampJ] * padfWeightsHorizontal[iC + 2];
4504 : dfAccumulatorLocal2 +=
4505 : pSrcBand[i + 3 + iSampJ] * padfWeightsHorizontal[iC + 3];
4506 : }
4507 : dfAccumulatorLocal += dfAccumulatorLocal2;
4508 : if (i < iMax)
4509 : {
4510 : dfAccumulatorLocal +=
4511 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4512 : dfAccumulatorLocal +=
4513 : pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4514 : i += 2;
4515 : iC += 2;
4516 : }
4517 : if (i == iMax)
4518 : {
4519 : dfAccumulatorLocal +=
4520 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4521 : }
4522 :
4523 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4524 : }
4525 :
4526 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4527 :
4528 : return true;
4529 : }
4530 :
4531 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
4532 : /* Could possibly be used too on 32bit, but we would need to check at runtime */
4533 : #if defined(__x86_64) || defined(_M_X64)
4534 :
4535 : /************************************************************************/
4536 : /* GWKResampleNoMasks_SSE2_T() */
4537 : /************************************************************************/
4538 :
4539 : template <class T>
4540 9164113 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
4541 : double dfSrcX, double dfSrcY, T *pValue,
4542 : double *padfWeightsHorizontal,
4543 : double *padfWeightsVertical,
4544 : double &dfInvWeights)
4545 : {
4546 : // Commonly used; save locally.
4547 9164113 : const int nSrcXSize = poWK->nSrcXSize;
4548 9164113 : const int nSrcYSize = poWK->nSrcYSize;
4549 :
4550 9164113 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4551 9164113 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4552 9164113 : const GPtrDiff_t iSrcOffset =
4553 9164113 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4554 9164113 : const int nXRadius = poWK->nXRadius;
4555 9164113 : const int nYRadius = poWK->nYRadius;
4556 :
4557 : // Politely refuse to process invalid coordinates or obscenely small image.
4558 9164113 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4559 : nYRadius > nSrcYSize)
4560 2 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4561 3 : pValue);
4562 :
4563 9173431 : const T *pSrcBand =
4564 9173431 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
4565 :
4566 9173431 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4567 9173431 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4568 9173431 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4569 9170971 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4570 :
4571 9152431 : int iMin = 1 - nXRadius;
4572 9152431 : if (iSrcX + iMin < 0)
4573 43143 : iMin = -iSrcX;
4574 9152431 : int iMax = nXRadius;
4575 9152431 : if (iSrcX + iMax >= nSrcXSize - 1)
4576 38106 : iMax = nSrcXSize - 1 - iSrcX;
4577 :
4578 9152431 : int jMin = 1 - nYRadius;
4579 9152431 : if (iSrcY + jMin < 0)
4580 49554 : jMin = -iSrcY;
4581 9152431 : int jMax = nYRadius;
4582 9152431 : if (iSrcY + jMax >= nSrcYSize - 1)
4583 36028 : jMax = nSrcYSize - 1 - iSrcY;
4584 :
4585 9152431 : if (iBand == 0)
4586 : {
4587 3744991 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4588 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4589 : padfWeightsVertical, dfInvWeights);
4590 : }
4591 :
4592 9155921 : GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4593 : // Process by chunk of 4 rows.
4594 9155921 : int jC = 0;
4595 9155921 : int j = jMin;
4596 9155921 : double dfAccumulator = 0.0;
4597 19407493 : for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
4598 : {
4599 : // Loop over all pixels in the row.
4600 10257992 : int iC = 0;
4601 10257992 : int i = iMin;
4602 : // Process by chunk of 4 cols.
4603 10257992 : XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
4604 10233532 : XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
4605 10239762 : XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
4606 10254642 : XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
4607 26741980 : for (; i + 2 < iMax; i += 4, iC += 4)
4608 : {
4609 : // Retrieve the pixel & accumulate.
4610 16480288 : XMMReg4Double v_pixels_1 =
4611 16480288 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4612 16505088 : XMMReg4Double v_pixels_2 =
4613 16505088 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
4614 16515588 : XMMReg4Double v_pixels_3 =
4615 16515588 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4616 16508288 : XMMReg4Double v_pixels_4 =
4617 16508288 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4618 :
4619 16516088 : XMMReg4Double v_padfWeight =
4620 16516088 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4621 :
4622 16475288 : v_acc_1 += v_pixels_1 * v_padfWeight;
4623 16487688 : v_acc_2 += v_pixels_2 * v_padfWeight;
4624 16511288 : v_acc_3 += v_pixels_3 * v_padfWeight;
4625 16510888 : v_acc_4 += v_pixels_4 * v_padfWeight;
4626 : }
4627 :
4628 10261702 : if (i < iMax)
4629 : {
4630 142910 : XMMReg2Double v_pixels_1 =
4631 142910 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
4632 142910 : XMMReg2Double v_pixels_2 =
4633 142910 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
4634 142910 : XMMReg2Double v_pixels_3 =
4635 142910 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4636 142910 : XMMReg2Double v_pixels_4 =
4637 142910 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4638 :
4639 142910 : XMMReg2Double v_padfWeight =
4640 142910 : XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
4641 :
4642 142910 : v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
4643 142910 : v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
4644 142910 : v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
4645 142910 : v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
4646 :
4647 142910 : i += 2;
4648 142910 : iC += 2;
4649 : }
4650 :
4651 10261702 : double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
4652 10242552 : double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
4653 10249302 : double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
4654 10261302 : double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
4655 :
4656 10251552 : if (i == iMax)
4657 : {
4658 49195 : dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
4659 49195 : padfWeightsHorizontal[iC];
4660 49195 : dfAccumulatorLocal_2 +=
4661 49195 : static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
4662 49195 : padfWeightsHorizontal[iC];
4663 49195 : dfAccumulatorLocal_3 +=
4664 49195 : static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
4665 49195 : padfWeightsHorizontal[iC];
4666 49195 : dfAccumulatorLocal_4 +=
4667 49195 : static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
4668 49195 : padfWeightsHorizontal[iC];
4669 : }
4670 :
4671 10251552 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
4672 10251552 : dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
4673 10251552 : dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
4674 10251552 : dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
4675 : }
4676 22263141 : for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
4677 : {
4678 : // Loop over all pixels in the row.
4679 13092440 : int iC = 0;
4680 13092440 : int i = iMin;
4681 : // Process by chunk of 4 cols.
4682 13092440 : XMMReg4Double v_acc = XMMReg4Double::Zero();
4683 26163863 : for (; i + 2 < iMax; i += 4, iC += 4)
4684 : {
4685 : // Retrieve the pixel & accumulate.
4686 13074523 : XMMReg4Double v_pixels =
4687 13074523 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4688 13100623 : XMMReg4Double v_padfWeight =
4689 13100623 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4690 :
4691 13116523 : v_acc += v_pixels * v_padfWeight;
4692 : }
4693 :
4694 13089440 : double dfAccumulatorLocal = v_acc.GetHorizSum();
4695 :
4696 13113640 : if (i < iMax)
4697 : {
4698 173964 : dfAccumulatorLocal +=
4699 173964 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4700 173964 : dfAccumulatorLocal +=
4701 173964 : pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4702 173964 : i += 2;
4703 173964 : iC += 2;
4704 : }
4705 13113640 : if (i == iMax)
4706 : {
4707 33020 : dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
4708 33020 : padfWeightsHorizontal[iC];
4709 : }
4710 :
4711 13113640 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4712 : }
4713 :
4714 9170701 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4715 :
4716 9179581 : return true;
4717 : }
4718 :
4719 : /************************************************************************/
4720 : /* GWKResampleNoMasksT<GByte>() */
4721 : /************************************************************************/
4722 :
4723 : template <>
4724 8586280 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
4725 : double dfSrcX, double dfSrcY, GByte *pValue,
4726 : double *padfWeightsHorizontal,
4727 : double *padfWeightsVertical,
4728 : double &dfInvWeights)
4729 : {
4730 8586280 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4731 : padfWeightsHorizontal, padfWeightsVertical,
4732 8564370 : dfInvWeights);
4733 : }
4734 :
4735 : /************************************************************************/
4736 : /* GWKResampleNoMasksT<GInt16>() */
4737 : /************************************************************************/
4738 :
4739 : template <>
4740 252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
4741 : double dfSrcX, double dfSrcY, GInt16 *pValue,
4742 : double *padfWeightsHorizontal,
4743 : double *padfWeightsVertical,
4744 : double &dfInvWeights)
4745 : {
4746 252563 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4747 : padfWeightsHorizontal, padfWeightsVertical,
4748 252563 : dfInvWeights);
4749 : }
4750 :
4751 : /************************************************************************/
4752 : /* GWKResampleNoMasksT<GUInt16>() */
4753 : /************************************************************************/
4754 :
4755 : template <>
4756 343440 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
4757 : double dfSrcX, double dfSrcY, GUInt16 *pValue,
4758 : double *padfWeightsHorizontal,
4759 : double *padfWeightsVertical,
4760 : double &dfInvWeights)
4761 : {
4762 343440 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4763 : padfWeightsHorizontal, padfWeightsVertical,
4764 343440 : dfInvWeights);
4765 : }
4766 :
4767 : /************************************************************************/
4768 : /* GWKResampleNoMasksT<float>() */
4769 : /************************************************************************/
4770 :
4771 : template <>
4772 2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
4773 : double dfSrcX, double dfSrcY, float *pValue,
4774 : double *padfWeightsHorizontal,
4775 : double *padfWeightsVertical,
4776 : double &dfInvWeights)
4777 : {
4778 2500 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4779 : padfWeightsHorizontal, padfWeightsVertical,
4780 2500 : dfInvWeights);
4781 : }
4782 :
4783 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
4784 :
4785 : /************************************************************************/
4786 : /* GWKResampleNoMasksT<double>() */
4787 : /************************************************************************/
4788 :
4789 : template <>
4790 : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
4791 : double dfSrcX, double dfSrcY, double *pValue,
4792 : double *padfWeightsHorizontal,
4793 : double *padfWeightsVertical,
4794 : double &dfInvWeights)
4795 : {
4796 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4797 : padfWeightsHorizontal, padfWeightsVertical,
4798 : dfInvWeights);
4799 : }
4800 :
4801 : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
4802 :
4803 : #endif /* defined(__x86_64) || defined(_M_X64) */
4804 :
4805 : /************************************************************************/
4806 : /* GWKRoundSourceCoordinates() */
4807 : /************************************************************************/
4808 :
4809 1000 : static void GWKRoundSourceCoordinates(
4810 : int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
4811 : double dfSrcCoordPrecision, double dfErrorThreshold,
4812 : GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
4813 : double dfDstY)
4814 : {
4815 1000 : double dfPct = 0.8;
4816 1000 : if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
4817 : {
4818 1000 : dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
4819 : }
4820 1000 : const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
4821 :
4822 501000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
4823 : {
4824 500000 : const double dfXBefore = padfX[iDstX];
4825 500000 : const double dfYBefore = padfY[iDstX];
4826 500000 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4827 : dfSrcCoordPrecision;
4828 500000 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4829 : dfSrcCoordPrecision;
4830 :
4831 : // If we are in an uncertainty zone, go to non-approximated
4832 : // transformation.
4833 : // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
4834 : // be at least 10 times greater than the approximation error.
4835 500000 : if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
4836 399914 : fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
4837 : {
4838 180090 : padfX[iDstX] = iDstX + dfDstXOff;
4839 180090 : padfY[iDstX] = dfDstY;
4840 180090 : padfZ[iDstX] = 0.0;
4841 180090 : pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
4842 180090 : padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
4843 180090 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4844 : dfSrcCoordPrecision;
4845 180090 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4846 : dfSrcCoordPrecision;
4847 : }
4848 : }
4849 1000 : }
4850 :
4851 : /************************************************************************/
4852 : /* GWKOpenCLCase() */
4853 : /* */
4854 : /* This is identical to GWKGeneralCase(), but functions via */
4855 : /* OpenCL. This means we have vector optimization (SSE) and/or */
4856 : /* GPU optimization depending on our prefs. The code itself is */
4857 : /* general and not optimized, but by defining constants we can */
4858 : /* make some pretty darn good code on the fly. */
4859 : /************************************************************************/
4860 :
4861 : #if defined(HAVE_OPENCL)
4862 0 : static CPLErr GWKOpenCLCase(GDALWarpKernel *poWK)
4863 : {
4864 0 : const int nDstXSize = poWK->nDstXSize;
4865 0 : const int nDstYSize = poWK->nDstYSize;
4866 0 : const int nSrcXSize = poWK->nSrcXSize;
4867 0 : const int nSrcYSize = poWK->nSrcYSize;
4868 0 : const int nDstXOff = poWK->nDstXOff;
4869 0 : const int nDstYOff = poWK->nDstYOff;
4870 0 : const int nSrcXOff = poWK->nSrcXOff;
4871 0 : const int nSrcYOff = poWK->nSrcYOff;
4872 0 : bool bUseImag = false;
4873 :
4874 : cl_channel_type imageFormat;
4875 0 : switch (poWK->eWorkingDataType)
4876 : {
4877 0 : case GDT_Byte:
4878 0 : imageFormat = CL_UNORM_INT8;
4879 0 : break;
4880 0 : case GDT_UInt16:
4881 0 : imageFormat = CL_UNORM_INT16;
4882 0 : break;
4883 0 : case GDT_CInt16:
4884 0 : bUseImag = true;
4885 : [[fallthrough]];
4886 0 : case GDT_Int16:
4887 0 : imageFormat = CL_SNORM_INT16;
4888 0 : break;
4889 0 : case GDT_CFloat32:
4890 0 : bUseImag = true;
4891 : [[fallthrough]];
4892 0 : case GDT_Float32:
4893 0 : imageFormat = CL_FLOAT;
4894 0 : break;
4895 0 : default:
4896 : // No support for higher precision formats.
4897 0 : CPLDebug("OpenCL", "Unsupported resampling OpenCL data type %d.",
4898 0 : static_cast<int>(poWK->eWorkingDataType));
4899 0 : return CE_Warning;
4900 : }
4901 :
4902 : OCLResampAlg resampAlg;
4903 0 : switch (poWK->eResample)
4904 : {
4905 0 : case GRA_Bilinear:
4906 0 : resampAlg = OCL_Bilinear;
4907 0 : break;
4908 0 : case GRA_Cubic:
4909 0 : resampAlg = OCL_Cubic;
4910 0 : break;
4911 0 : case GRA_CubicSpline:
4912 0 : resampAlg = OCL_CubicSpline;
4913 0 : break;
4914 0 : case GRA_Lanczos:
4915 0 : resampAlg = OCL_Lanczos;
4916 0 : break;
4917 0 : default:
4918 : // No support for higher precision formats.
4919 0 : CPLDebug("OpenCL",
4920 : "Unsupported resampling OpenCL resampling alg %d.",
4921 0 : static_cast<int>(poWK->eResample));
4922 0 : return CE_Warning;
4923 : }
4924 :
4925 0 : struct oclWarper *warper = nullptr;
4926 : cl_int err;
4927 0 : CPLErr eErr = CE_None;
4928 :
4929 : // TODO(schwehr): Fix indenting.
4930 : try
4931 : {
4932 :
4933 : // Using a factor of 2 or 4 seems to have much less rounding error
4934 : // than 3 on the GPU.
4935 : // Then the rounding error can cause strange artifacts under the
4936 : // right conditions.
4937 0 : warper = GDALWarpKernelOpenCL_createEnv(
4938 : nSrcXSize, nSrcYSize, nDstXSize, nDstYSize, imageFormat,
4939 0 : poWK->nBands, 4, bUseImag, poWK->papanBandSrcValid != nullptr,
4940 : poWK->pafDstDensity, poWK->padfDstNoDataReal, resampAlg, &err);
4941 :
4942 0 : if (err != CL_SUCCESS || warper == nullptr)
4943 : {
4944 0 : eErr = CE_Warning;
4945 0 : if (warper != nullptr)
4946 0 : throw eErr;
4947 0 : return eErr;
4948 : }
4949 :
4950 0 : CPLDebug("GDAL",
4951 : "GDALWarpKernel()::GWKOpenCLCase() "
4952 : "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
4953 : nSrcXOff, nSrcYOff, nSrcXSize, nSrcYSize, nDstXOff, nDstYOff,
4954 : nDstXSize, nDstYSize);
4955 :
4956 0 : if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
4957 : {
4958 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
4959 0 : eErr = CE_Failure;
4960 0 : throw eErr;
4961 : }
4962 :
4963 : /* ====================================================================
4964 : */
4965 : /* Loop over bands. */
4966 : /* ====================================================================
4967 : */
4968 0 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
4969 : {
4970 0 : if (poWK->papanBandSrcValid != nullptr &&
4971 0 : poWK->papanBandSrcValid[iBand] != nullptr)
4972 : {
4973 0 : GDALWarpKernelOpenCL_setSrcValid(
4974 : warper,
4975 0 : reinterpret_cast<int *>(poWK->papanBandSrcValid[iBand]),
4976 : iBand);
4977 0 : if (err != CL_SUCCESS)
4978 : {
4979 0 : CPLError(
4980 : CE_Failure, CPLE_AppDefined,
4981 : "OpenCL routines reported failure (%d) on line %d.",
4982 : static_cast<int>(err), __LINE__);
4983 0 : eErr = CE_Failure;
4984 0 : throw eErr;
4985 : }
4986 : }
4987 :
4988 0 : err = GDALWarpKernelOpenCL_setSrcImg(
4989 0 : warper, poWK->papabySrcImage[iBand], iBand);
4990 0 : if (err != CL_SUCCESS)
4991 : {
4992 0 : CPLError(CE_Failure, CPLE_AppDefined,
4993 : "OpenCL routines reported failure (%d) on line %d.",
4994 : static_cast<int>(err), __LINE__);
4995 0 : eErr = CE_Failure;
4996 0 : throw eErr;
4997 : }
4998 :
4999 0 : err = GDALWarpKernelOpenCL_setDstImg(
5000 0 : warper, poWK->papabyDstImage[iBand], iBand);
5001 0 : if (err != CL_SUCCESS)
5002 : {
5003 0 : CPLError(CE_Failure, CPLE_AppDefined,
5004 : "OpenCL routines reported failure (%d) on line %d.",
5005 : static_cast<int>(err), __LINE__);
5006 0 : eErr = CE_Failure;
5007 0 : throw eErr;
5008 : }
5009 : }
5010 :
5011 : /* --------------------------------------------------------------------
5012 : */
5013 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5014 : /* scanlines worth of positions. */
5015 : /* --------------------------------------------------------------------
5016 : */
5017 :
5018 : // For x, 2 *, because we cache the precomputed values at the end.
5019 : double *padfX =
5020 0 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5021 : double *padfY =
5022 0 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5023 : double *padfZ =
5024 0 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5025 : int *pabSuccess =
5026 0 : static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5027 0 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5028 0 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5029 0 : const double dfErrorThreshold = CPLAtof(CSLFetchNameValueDef(
5030 0 : poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5031 :
5032 : // Precompute values.
5033 0 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5034 0 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5035 :
5036 : /* ====================================================================
5037 : */
5038 : /* Loop over output lines. */
5039 : /* ====================================================================
5040 : */
5041 0 : for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; ++iDstY)
5042 : {
5043 : /* ----------------------------------------------------------------
5044 : */
5045 : /* Setup points to transform to source image space. */
5046 : /* ----------------------------------------------------------------
5047 : */
5048 0 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5049 0 : const double dfYConst = iDstY + 0.5 + poWK->nDstYOff;
5050 0 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5051 0 : padfY[iDstX] = dfYConst;
5052 0 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5053 :
5054 : /* ----------------------------------------------------------------
5055 : */
5056 : /* Transform the points from destination pixel/line
5057 : * coordinates*/
5058 : /* to source pixel/line coordinates. */
5059 : /* ----------------------------------------------------------------
5060 : */
5061 0 : poWK->pfnTransformer(poWK->pTransformerArg, TRUE, nDstXSize, padfX,
5062 : padfY, padfZ, pabSuccess);
5063 0 : if (dfSrcCoordPrecision > 0.0)
5064 : {
5065 0 : GWKRoundSourceCoordinates(
5066 : nDstXSize, padfX, padfY, padfZ, pabSuccess,
5067 : dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
5068 : poWK->pTransformerArg, 0.5 + nDstXOff,
5069 0 : iDstY + 0.5 + nDstYOff);
5070 : }
5071 :
5072 0 : err = GDALWarpKernelOpenCL_setCoordRow(
5073 : warper, padfX, padfY, nSrcXOff, nSrcYOff, pabSuccess, iDstY);
5074 0 : if (err != CL_SUCCESS)
5075 : {
5076 0 : CPLError(CE_Failure, CPLE_AppDefined,
5077 : "OpenCL routines reported failure (%d) on line %d.",
5078 : static_cast<int>(err), __LINE__);
5079 0 : eErr = CE_Failure;
5080 0 : break;
5081 : }
5082 :
5083 : // Update the valid & density masks because we don't do so in the
5084 : // kernel.
5085 0 : for (int iDstX = 0; iDstX < nDstXSize && eErr == CE_None; iDstX++)
5086 : {
5087 0 : const double dfX = padfX[iDstX];
5088 0 : const double dfY = padfY[iDstX];
5089 0 : const GPtrDiff_t iDstOffset =
5090 0 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5091 :
5092 : // See GWKGeneralCase() for appropriate commenting.
5093 0 : if (!pabSuccess[iDstX] || dfX < nSrcXOff || dfY < nSrcYOff)
5094 0 : continue;
5095 :
5096 0 : int iSrcX = static_cast<int>(dfX) - nSrcXOff;
5097 0 : int iSrcY = static_cast<int>(dfY) - nSrcYOff;
5098 :
5099 0 : if (iSrcX < 0 || iSrcX >= nSrcXSize || iSrcY < 0 ||
5100 : iSrcY >= nSrcYSize)
5101 0 : continue;
5102 :
5103 0 : GPtrDiff_t iSrcOffset =
5104 0 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
5105 0 : double dfDensity = 1.0;
5106 :
5107 0 : if (poWK->pafUnifiedSrcDensity != nullptr && iSrcX >= 0 &&
5108 0 : iSrcY >= 0 && iSrcX < nSrcXSize && iSrcY < nSrcYSize)
5109 0 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5110 :
5111 0 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5112 :
5113 : // Because this is on the bit-wise level, it can't be done well
5114 : // in OpenCL.
5115 0 : if (poWK->panDstValid != nullptr)
5116 0 : poWK->panDstValid[iDstOffset >> 5] |=
5117 0 : 0x01 << (iDstOffset & 0x1f);
5118 : }
5119 : }
5120 :
5121 0 : CPLFree(padfX);
5122 0 : CPLFree(padfY);
5123 0 : CPLFree(padfZ);
5124 0 : CPLFree(pabSuccess);
5125 :
5126 0 : if (eErr != CE_None)
5127 0 : throw eErr;
5128 :
5129 0 : err = GDALWarpKernelOpenCL_runResamp(
5130 : warper, poWK->pafUnifiedSrcDensity, poWK->panUnifiedSrcValid,
5131 : poWK->pafDstDensity, poWK->panDstValid, poWK->dfXScale,
5132 : poWK->dfYScale, poWK->dfXFilter, poWK->dfYFilter, poWK->nXRadius,
5133 : poWK->nYRadius, poWK->nFiltInitX, poWK->nFiltInitY);
5134 :
5135 0 : if (err != CL_SUCCESS)
5136 : {
5137 0 : CPLError(CE_Failure, CPLE_AppDefined,
5138 : "OpenCL routines reported failure (%d) on line %d.",
5139 : static_cast<int>(err), __LINE__);
5140 0 : eErr = CE_Failure;
5141 0 : throw eErr;
5142 : }
5143 :
5144 : /* ====================================================================
5145 : */
5146 : /* Loop over output lines. */
5147 : /* ====================================================================
5148 : */
5149 0 : for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; iDstY++)
5150 : {
5151 0 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5152 : {
5153 0 : void *rowReal = nullptr;
5154 0 : void *rowImag = nullptr;
5155 0 : GByte *pabyDst = poWK->papabyDstImage[iBand];
5156 :
5157 0 : err = GDALWarpKernelOpenCL_getRow(warper, &rowReal, &rowImag,
5158 : iDstY, iBand);
5159 0 : if (err != CL_SUCCESS)
5160 : {
5161 0 : CPLError(
5162 : CE_Failure, CPLE_AppDefined,
5163 : "OpenCL routines reported failure (%d) on line %d.",
5164 : static_cast<int>(err), __LINE__);
5165 0 : eErr = CE_Failure;
5166 0 : throw eErr;
5167 : }
5168 :
5169 : // Copy the data from the warper to GDAL's memory.
5170 0 : switch (poWK->eWorkingDataType)
5171 : {
5172 0 : case GDT_Byte:
5173 0 : memcpy(&(pabyDst[iDstY * nDstXSize]), rowReal,
5174 : sizeof(GByte) * nDstXSize);
5175 0 : break;
5176 0 : case GDT_Int16:
5177 0 : memcpy(&(reinterpret_cast<GInt16 *>(
5178 0 : pabyDst)[iDstY * nDstXSize]),
5179 0 : rowReal, sizeof(GInt16) * nDstXSize);
5180 0 : break;
5181 0 : case GDT_UInt16:
5182 0 : memcpy(&(reinterpret_cast<GUInt16 *>(
5183 0 : pabyDst)[iDstY * nDstXSize]),
5184 0 : rowReal, sizeof(GUInt16) * nDstXSize);
5185 0 : break;
5186 0 : case GDT_Float32:
5187 0 : memcpy(&(reinterpret_cast<float *>(
5188 0 : pabyDst)[iDstY * nDstXSize]),
5189 0 : rowReal, sizeof(float) * nDstXSize);
5190 0 : break;
5191 0 : case GDT_CInt16:
5192 : {
5193 0 : GInt16 *pabyDstI16 = &(reinterpret_cast<GInt16 *>(
5194 0 : pabyDst)[iDstY * nDstXSize]);
5195 0 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5196 : {
5197 0 : pabyDstI16[iDstX * 2] =
5198 0 : static_cast<GInt16 *>(rowReal)[iDstX];
5199 0 : pabyDstI16[iDstX * 2 + 1] =
5200 0 : static_cast<GInt16 *>(rowImag)[iDstX];
5201 : }
5202 : }
5203 0 : break;
5204 0 : case GDT_CFloat32:
5205 : {
5206 0 : float *pabyDstF32 = &(reinterpret_cast<float *>(
5207 0 : pabyDst)[iDstY * nDstXSize]);
5208 0 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5209 : {
5210 0 : pabyDstF32[iDstX * 2] =
5211 0 : static_cast<float *>(rowReal)[iDstX];
5212 0 : pabyDstF32[iDstX * 2 + 1] =
5213 0 : static_cast<float *>(rowImag)[iDstX];
5214 : }
5215 : }
5216 0 : break;
5217 0 : default:
5218 : // No support for higher precision formats.
5219 0 : CPLError(CE_Failure, CPLE_AppDefined,
5220 : "Unsupported resampling OpenCL data type %d.",
5221 0 : static_cast<int>(poWK->eWorkingDataType));
5222 0 : eErr = CE_Failure;
5223 0 : throw eErr;
5224 : }
5225 : }
5226 : }
5227 : }
5228 0 : catch (const CPLErr &)
5229 : {
5230 : }
5231 :
5232 0 : if ((err = GDALWarpKernelOpenCL_deleteEnv(warper)) != CL_SUCCESS)
5233 : {
5234 0 : CPLError(CE_Failure, CPLE_AppDefined,
5235 : "OpenCL routines reported failure (%d) on line %d.",
5236 : static_cast<int>(err), __LINE__);
5237 0 : return CE_Failure;
5238 : }
5239 :
5240 0 : return eErr;
5241 : }
5242 : #endif /* defined(HAVE_OPENCL) */
5243 :
5244 : /************************************************************************/
5245 : /* GWKCheckAndComputeSrcOffsets() */
5246 : /************************************************************************/
5247 : static CPL_INLINE bool
5248 109623000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
5249 : int _iDstY, double *_padfX, double *_padfY,
5250 : int _nSrcXSize, int _nSrcYSize,
5251 : GPtrDiff_t &iSrcOffset)
5252 : {
5253 109623000 : const GDALWarpKernel *_poWK = psJob->poWK;
5254 109749000 : for (int iTry = 0; iTry < 2; ++iTry)
5255 : {
5256 109757000 : if (iTry == 1)
5257 : {
5258 : // If the source coordinate is slightly outside of the source raster
5259 : // retry to transform it alone, so that the exact coordinate
5260 : // transformer is used.
5261 :
5262 125624 : _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
5263 125624 : _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
5264 125624 : double dfZ = 0;
5265 125624 : _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
5266 125624 : _padfX + _iDstX, _padfY + _iDstX, &dfZ,
5267 125624 : _pabSuccess + _iDstX);
5268 : }
5269 109757000 : if (!_pabSuccess[_iDstX])
5270 3593220 : return false;
5271 :
5272 : // If this happens this is likely the symptom of a bug somewhere.
5273 106164000 : if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
5274 : {
5275 : static bool bNanCoordFound = false;
5276 0 : if (!bNanCoordFound)
5277 : {
5278 0 : CPLDebug("WARP",
5279 : "GWKCheckAndComputeSrcOffsets(): "
5280 : "NaN coordinate found on point %d.",
5281 : _iDstX);
5282 0 : bNanCoordFound = true;
5283 : }
5284 0 : return false;
5285 : }
5286 :
5287 : /* --------------------------------------------------------------------
5288 : */
5289 : /* Figure out what pixel we want in our source raster, and skip */
5290 : /* further processing if it is well off the source image. */
5291 : /* --------------------------------------------------------------------
5292 : */
5293 : /* We test against the value before casting to avoid the */
5294 : /* problem of asymmetric truncation effects around zero. That is */
5295 : /* -0.5 will be 0 when cast to an int. */
5296 106158000 : if (_padfX[_iDstX] < _poWK->nSrcXOff)
5297 : {
5298 : // If the source coordinate is slightly outside of the source raster
5299 : // retry to transform it alone, so that the exact coordinate
5300 : // transformer is used.
5301 4137250 : if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
5302 21433 : continue;
5303 4115820 : return false;
5304 : }
5305 :
5306 102021000 : if (_padfY[_iDstX] < _poWK->nSrcYOff)
5307 : {
5308 : // If the source coordinate is slightly outside of the source raster
5309 : // retry to transform it alone, so that the exact coordinate
5310 : // transformer is used.
5311 4792200 : if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
5312 38435 : continue;
5313 4753770 : return false;
5314 : }
5315 :
5316 : // Check for potential overflow when casting from float to int, (if
5317 : // operating outside natural projection area, padfX/Y can be a very huge
5318 : // positive number before doing the actual conversion), as such cast is
5319 : // undefined behavior that can trigger exception with some compilers
5320 : // (see #6753)
5321 97229000 : if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
5322 : {
5323 : // If the source coordinate is slightly outside of the source raster
5324 : // retry to transform it alone, so that the exact coordinate
5325 : // transformer is used.
5326 3496360 : if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
5327 33239 : continue;
5328 3463130 : return false;
5329 : }
5330 93732600 : if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
5331 : {
5332 : // If the source coordinate is slightly outside of the source raster
5333 : // retry to transform it alone, so that the exact coordinate
5334 : // transformer is used.
5335 3731400 : if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
5336 32517 : continue;
5337 3698880 : return false;
5338 : }
5339 :
5340 90001200 : break;
5341 : }
5342 :
5343 89993100 : int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
5344 89993100 : int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
5345 89993100 : if (iSrcX == _nSrcXSize)
5346 0 : iSrcX--;
5347 89993100 : if (iSrcY == _nSrcYSize)
5348 0 : iSrcY--;
5349 :
5350 : // Those checks should normally be OK given the previous ones.
5351 89993100 : CPLAssert(iSrcX >= 0);
5352 89993100 : CPLAssert(iSrcY >= 0);
5353 89993100 : CPLAssert(iSrcX < _nSrcXSize);
5354 89993100 : CPLAssert(iSrcY < _nSrcYSize);
5355 :
5356 89993100 : iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
5357 :
5358 89993100 : return true;
5359 : }
5360 :
5361 : /************************************************************************/
5362 : /* GWKOneSourceCornerFailsToReproject() */
5363 : /************************************************************************/
5364 :
5365 719 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
5366 : {
5367 719 : GDALWarpKernel *poWK = psJob->poWK;
5368 2147 : for (int iY = 0; iY <= 1; ++iY)
5369 : {
5370 4290 : for (int iX = 0; iX <= 1; ++iX)
5371 : {
5372 2862 : double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
5373 2862 : double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
5374 2862 : double dfZTmp = 0;
5375 2862 : int nSuccess = FALSE;
5376 2862 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
5377 : &dfYTmp, &dfZTmp, &nSuccess);
5378 2862 : if (!nSuccess)
5379 6 : return true;
5380 : }
5381 : }
5382 713 : return false;
5383 : }
5384 :
5385 : /************************************************************************/
5386 : /* GWKAdjustSrcOffsetOnEdge() */
5387 : /************************************************************************/
5388 :
5389 9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
5390 : GPtrDiff_t &iSrcOffset)
5391 : {
5392 9714 : GDALWarpKernel *poWK = psJob->poWK;
5393 9714 : const int nSrcXSize = poWK->nSrcXSize;
5394 9714 : const int nSrcYSize = poWK->nSrcYSize;
5395 :
5396 : // Check if the computed source position slightly altered
5397 : // fails to reproject. If so, then we are at the edge of
5398 : // the validity area, and it is worth checking neighbour
5399 : // source pixels for validity.
5400 9714 : int nSuccess = FALSE;
5401 : {
5402 9714 : double dfXTmp =
5403 9714 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5404 9714 : double dfYTmp =
5405 9714 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5406 9714 : double dfZTmp = 0;
5407 9714 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5408 : &dfZTmp, &nSuccess);
5409 : }
5410 9714 : if (nSuccess)
5411 : {
5412 6996 : double dfXTmp =
5413 6996 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5414 6996 : double dfYTmp =
5415 6996 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5416 6996 : double dfZTmp = 0;
5417 6996 : nSuccess = FALSE;
5418 6996 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5419 : &dfZTmp, &nSuccess);
5420 : }
5421 9714 : if (nSuccess)
5422 : {
5423 5624 : double dfXTmp =
5424 5624 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5425 5624 : double dfYTmp =
5426 5624 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5427 5624 : double dfZTmp = 0;
5428 5624 : nSuccess = FALSE;
5429 5624 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5430 : &dfZTmp, &nSuccess);
5431 : }
5432 :
5433 14166 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5434 4452 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
5435 : {
5436 1860 : iSrcOffset++;
5437 1860 : return true;
5438 : }
5439 10290 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5440 2436 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
5441 : {
5442 1334 : iSrcOffset += nSrcXSize;
5443 1334 : return true;
5444 : }
5445 7838 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5446 1318 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
5447 : {
5448 956 : iSrcOffset--;
5449 956 : return true;
5450 : }
5451 5924 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5452 360 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
5453 : {
5454 340 : iSrcOffset -= nSrcXSize;
5455 340 : return true;
5456 : }
5457 :
5458 5224 : return false;
5459 : }
5460 :
5461 : /************************************************************************/
5462 : /* GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity() */
5463 : /************************************************************************/
5464 :
5465 0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
5466 : GPtrDiff_t &iSrcOffset)
5467 : {
5468 0 : GDALWarpKernel *poWK = psJob->poWK;
5469 0 : const int nSrcXSize = poWK->nSrcXSize;
5470 0 : const int nSrcYSize = poWK->nSrcYSize;
5471 :
5472 : // Check if the computed source position slightly altered
5473 : // fails to reproject. If so, then we are at the edge of
5474 : // the validity area, and it is worth checking neighbour
5475 : // source pixels for validity.
5476 0 : int nSuccess = FALSE;
5477 : {
5478 0 : double dfXTmp =
5479 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5480 0 : double dfYTmp =
5481 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5482 0 : double dfZTmp = 0;
5483 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5484 : &dfZTmp, &nSuccess);
5485 : }
5486 0 : if (nSuccess)
5487 : {
5488 0 : double dfXTmp =
5489 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5490 0 : double dfYTmp =
5491 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5492 0 : double dfZTmp = 0;
5493 0 : nSuccess = FALSE;
5494 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5495 : &dfZTmp, &nSuccess);
5496 : }
5497 0 : if (nSuccess)
5498 : {
5499 0 : double dfXTmp =
5500 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5501 0 : double dfYTmp =
5502 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5503 0 : double dfZTmp = 0;
5504 0 : nSuccess = FALSE;
5505 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5506 : &dfZTmp, &nSuccess);
5507 : }
5508 :
5509 0 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5510 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >= SRC_DENSITY_THRESHOLD)
5511 : {
5512 0 : iSrcOffset++;
5513 0 : return true;
5514 : }
5515 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5516 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
5517 : SRC_DENSITY_THRESHOLD)
5518 : {
5519 0 : iSrcOffset += nSrcXSize;
5520 0 : return true;
5521 : }
5522 0 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5523 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
5524 : SRC_DENSITY_THRESHOLD)
5525 : {
5526 0 : iSrcOffset--;
5527 0 : return true;
5528 : }
5529 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5530 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
5531 : SRC_DENSITY_THRESHOLD)
5532 : {
5533 0 : iSrcOffset -= nSrcXSize;
5534 0 : return true;
5535 : }
5536 :
5537 0 : return false;
5538 : }
5539 :
5540 : /************************************************************************/
5541 : /* GWKGeneralCase() */
5542 : /* */
5543 : /* This is the most general case. It attempts to handle all */
5544 : /* possible features with relatively little concern for */
5545 : /* efficiency. */
5546 : /************************************************************************/
5547 :
5548 243 : static void GWKGeneralCaseThread(void *pData)
5549 : {
5550 243 : GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
5551 243 : GDALWarpKernel *poWK = psJob->poWK;
5552 243 : const int iYMin = psJob->iYMin;
5553 243 : const int iYMax = psJob->iYMax;
5554 : const double dfMultFactorVerticalShiftPipeline =
5555 243 : poWK->bApplyVerticalShift
5556 243 : ? CPLAtof(CSLFetchNameValueDef(
5557 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5558 : "1.0"))
5559 243 : : 0.0;
5560 :
5561 243 : int nDstXSize = poWK->nDstXSize;
5562 243 : int nSrcXSize = poWK->nSrcXSize;
5563 243 : int nSrcYSize = poWK->nSrcYSize;
5564 :
5565 : /* -------------------------------------------------------------------- */
5566 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5567 : /* scanlines worth of positions. */
5568 : /* -------------------------------------------------------------------- */
5569 : // For x, 2 *, because we cache the precomputed values at the end.
5570 : double *padfX =
5571 243 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5572 : double *padfY =
5573 243 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5574 : double *padfZ =
5575 243 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5576 243 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5577 :
5578 243 : const bool bUse4SamplesFormula =
5579 243 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5580 :
5581 243 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5582 243 : if (poWK->eResample != GRA_NearestNeighbour)
5583 : {
5584 224 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5585 : }
5586 243 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5587 243 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5588 243 : const double dfErrorThreshold = CPLAtof(
5589 243 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5590 :
5591 : const bool bOneSourceCornerFailsToReproject =
5592 243 : GWKOneSourceCornerFailsToReproject(psJob);
5593 :
5594 : // Precompute values.
5595 6513 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5596 6270 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5597 :
5598 : /* ==================================================================== */
5599 : /* Loop over output lines. */
5600 : /* ==================================================================== */
5601 6513 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5602 : {
5603 : /* --------------------------------------------------------------------
5604 : */
5605 : /* Setup points to transform to source image space. */
5606 : /* --------------------------------------------------------------------
5607 : */
5608 6270 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5609 6270 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5610 242830 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5611 236560 : padfY[iDstX] = dfY;
5612 6270 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5613 :
5614 : /* --------------------------------------------------------------------
5615 : */
5616 : /* Transform the points from destination pixel/line coordinates */
5617 : /* to source pixel/line coordinates. */
5618 : /* --------------------------------------------------------------------
5619 : */
5620 6270 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5621 : padfY, padfZ, pabSuccess);
5622 6270 : if (dfSrcCoordPrecision > 0.0)
5623 : {
5624 0 : GWKRoundSourceCoordinates(
5625 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5626 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5627 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5628 : }
5629 :
5630 : /* ====================================================================
5631 : */
5632 : /* Loop over pixels in output scanline. */
5633 : /* ====================================================================
5634 : */
5635 242830 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5636 : {
5637 236560 : GPtrDiff_t iSrcOffset = 0;
5638 236560 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5639 : padfX, padfY, nSrcXSize,
5640 : nSrcYSize, iSrcOffset))
5641 0 : continue;
5642 :
5643 : /* --------------------------------------------------------------------
5644 : */
5645 : /* Do not try to apply transparent/invalid source pixels to the
5646 : */
5647 : /* destination. This currently ignores the multi-pixel input
5648 : */
5649 : /* of bilinear and cubic resamples. */
5650 : /* --------------------------------------------------------------------
5651 : */
5652 236560 : double dfDensity = 1.0;
5653 :
5654 236560 : if (poWK->pafUnifiedSrcDensity != nullptr)
5655 : {
5656 1200 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5657 1200 : if (dfDensity < SRC_DENSITY_THRESHOLD)
5658 : {
5659 0 : if (!bOneSourceCornerFailsToReproject)
5660 : {
5661 0 : continue;
5662 : }
5663 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5664 : psJob, iSrcOffset))
5665 : {
5666 0 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5667 : }
5668 : else
5669 : {
5670 0 : continue;
5671 : }
5672 : }
5673 : }
5674 :
5675 236560 : if (poWK->panUnifiedSrcValid != nullptr &&
5676 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5677 : {
5678 0 : if (!bOneSourceCornerFailsToReproject)
5679 : {
5680 0 : continue;
5681 : }
5682 0 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5683 : {
5684 0 : continue;
5685 : }
5686 : }
5687 :
5688 : /* ====================================================================
5689 : */
5690 : /* Loop processing each band. */
5691 : /* ====================================================================
5692 : */
5693 236560 : bool bHasFoundDensity = false;
5694 :
5695 236560 : const GPtrDiff_t iDstOffset =
5696 236560 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5697 473120 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5698 : {
5699 236560 : double dfBandDensity = 0.0;
5700 236560 : double dfValueReal = 0.0;
5701 236560 : double dfValueImag = 0.0;
5702 :
5703 : /* --------------------------------------------------------------------
5704 : */
5705 : /* Collect the source value. */
5706 : /* --------------------------------------------------------------------
5707 : */
5708 236560 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5709 : nSrcYSize == 1)
5710 : {
5711 : // FALSE is returned if dfBandDensity == 0, which is
5712 : // checked below.
5713 568 : CPL_IGNORE_RET_VAL(GWKGetPixelValue(
5714 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
5715 : &dfValueImag));
5716 : }
5717 235992 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5718 : {
5719 648 : GWKBilinearResample4Sample(
5720 648 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5721 648 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5722 : &dfValueReal, &dfValueImag);
5723 : }
5724 235344 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5725 : {
5726 248 : GWKCubicResample4Sample(
5727 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5728 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5729 : &dfValueReal, &dfValueImag);
5730 : }
5731 : else
5732 : #ifdef DEBUG
5733 : // Only useful for clang static analyzer.
5734 235096 : if (psWrkStruct != nullptr)
5735 : #endif
5736 : {
5737 235096 : psWrkStruct->pfnGWKResample(
5738 235096 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5739 235096 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5740 : &dfValueReal, &dfValueImag, psWrkStruct);
5741 : }
5742 :
5743 : // If we didn't find any valid inputs skip to next band.
5744 236560 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5745 0 : continue;
5746 :
5747 236560 : if (poWK->bApplyVerticalShift)
5748 : {
5749 0 : if (!std::isfinite(padfZ[iDstX]))
5750 0 : continue;
5751 : // Subtract padfZ[] since the coordinate transformation is
5752 : // from target to source
5753 0 : dfValueReal =
5754 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
5755 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5756 : }
5757 :
5758 236560 : bHasFoundDensity = true;
5759 :
5760 : /* --------------------------------------------------------------------
5761 : */
5762 : /* We have a computed value from the source. Now apply it
5763 : * to */
5764 : /* the destination pixel. */
5765 : /* --------------------------------------------------------------------
5766 : */
5767 236560 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
5768 : dfValueReal, dfValueImag);
5769 : }
5770 :
5771 236560 : if (!bHasFoundDensity)
5772 0 : continue;
5773 :
5774 : /* --------------------------------------------------------------------
5775 : */
5776 : /* Update destination density/validity masks. */
5777 : /* --------------------------------------------------------------------
5778 : */
5779 236560 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5780 :
5781 236560 : if (poWK->panDstValid != nullptr)
5782 : {
5783 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
5784 : }
5785 : } /* Next iDstX */
5786 :
5787 : /* --------------------------------------------------------------------
5788 : */
5789 : /* Report progress to the user, and optionally cancel out. */
5790 : /* --------------------------------------------------------------------
5791 : */
5792 6270 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5793 0 : break;
5794 : }
5795 :
5796 : /* -------------------------------------------------------------------- */
5797 : /* Cleanup and return. */
5798 : /* -------------------------------------------------------------------- */
5799 243 : CPLFree(padfX);
5800 243 : CPLFree(padfY);
5801 243 : CPLFree(padfZ);
5802 243 : CPLFree(pabSuccess);
5803 243 : if (psWrkStruct)
5804 224 : GWKResampleDeleteWrkStruct(psWrkStruct);
5805 243 : }
5806 :
5807 243 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
5808 : {
5809 243 : return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
5810 : }
5811 :
5812 : /************************************************************************/
5813 : /* GWKRealCase() */
5814 : /* */
5815 : /* General case for non-complex data types. */
5816 : /************************************************************************/
5817 :
5818 133 : static void GWKRealCaseThread(void *pData)
5819 :
5820 : {
5821 133 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5822 133 : GDALWarpKernel *poWK = psJob->poWK;
5823 133 : const int iYMin = psJob->iYMin;
5824 133 : const int iYMax = psJob->iYMax;
5825 :
5826 133 : const int nDstXSize = poWK->nDstXSize;
5827 133 : const int nSrcXSize = poWK->nSrcXSize;
5828 133 : const int nSrcYSize = poWK->nSrcYSize;
5829 : const double dfMultFactorVerticalShiftPipeline =
5830 133 : poWK->bApplyVerticalShift
5831 133 : ? CPLAtof(CSLFetchNameValueDef(
5832 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5833 : "1.0"))
5834 133 : : 0.0;
5835 :
5836 : /* -------------------------------------------------------------------- */
5837 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5838 : /* scanlines worth of positions. */
5839 : /* -------------------------------------------------------------------- */
5840 :
5841 : // For x, 2 *, because we cache the precomputed values at the end.
5842 : double *padfX =
5843 133 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5844 : double *padfY =
5845 133 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5846 : double *padfZ =
5847 133 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5848 133 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5849 :
5850 133 : const bool bUse4SamplesFormula =
5851 133 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5852 :
5853 133 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5854 133 : if (poWK->eResample != GRA_NearestNeighbour)
5855 : {
5856 117 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5857 : }
5858 133 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5859 133 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5860 133 : const double dfErrorThreshold = CPLAtof(
5861 133 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5862 :
5863 384 : const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
5864 251 : poWK->papanBandSrcValid == nullptr &&
5865 118 : poWK->pafUnifiedSrcDensity != nullptr;
5866 :
5867 : const bool bOneSourceCornerFailsToReproject =
5868 133 : GWKOneSourceCornerFailsToReproject(psJob);
5869 :
5870 : // Precompute values.
5871 18764 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5872 18631 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5873 :
5874 : /* ==================================================================== */
5875 : /* Loop over output lines. */
5876 : /* ==================================================================== */
5877 21515 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5878 : {
5879 : /* --------------------------------------------------------------------
5880 : */
5881 : /* Setup points to transform to source image space. */
5882 : /* --------------------------------------------------------------------
5883 : */
5884 21382 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5885 21382 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5886 43456400 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5887 43435000 : padfY[iDstX] = dfY;
5888 21382 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5889 :
5890 : /* --------------------------------------------------------------------
5891 : */
5892 : /* Transform the points from destination pixel/line coordinates */
5893 : /* to source pixel/line coordinates. */
5894 : /* --------------------------------------------------------------------
5895 : */
5896 21382 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5897 : padfY, padfZ, pabSuccess);
5898 21382 : if (dfSrcCoordPrecision > 0.0)
5899 : {
5900 0 : GWKRoundSourceCoordinates(
5901 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5902 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5903 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5904 : }
5905 :
5906 : /* ====================================================================
5907 : */
5908 : /* Loop over pixels in output scanline. */
5909 : /* ====================================================================
5910 : */
5911 43456400 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5912 : {
5913 43435000 : GPtrDiff_t iSrcOffset = 0;
5914 43435000 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5915 : padfX, padfY, nSrcXSize,
5916 : nSrcYSize, iSrcOffset))
5917 42842700 : continue;
5918 :
5919 : /* --------------------------------------------------------------------
5920 : */
5921 : /* Do not try to apply transparent/invalid source pixels to the
5922 : */
5923 : /* destination. This currently ignores the multi-pixel input
5924 : */
5925 : /* of bilinear and cubic resamples. */
5926 : /* --------------------------------------------------------------------
5927 : */
5928 31382600 : double dfDensity = 1.0;
5929 :
5930 31382600 : if (poWK->pafUnifiedSrcDensity != nullptr)
5931 : {
5932 1262880 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5933 1262880 : if (dfDensity < SRC_DENSITY_THRESHOLD)
5934 : {
5935 1261590 : if (!bOneSourceCornerFailsToReproject)
5936 : {
5937 1261590 : continue;
5938 : }
5939 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5940 : psJob, iSrcOffset))
5941 : {
5942 0 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5943 : }
5944 : else
5945 : {
5946 0 : continue;
5947 : }
5948 : }
5949 : }
5950 :
5951 59749600 : if (poWK->panUnifiedSrcValid != nullptr &&
5952 29628600 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5953 : {
5954 29531000 : if (!bOneSourceCornerFailsToReproject)
5955 : {
5956 29528700 : continue;
5957 : }
5958 2229 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5959 : {
5960 0 : continue;
5961 : }
5962 : }
5963 :
5964 : /* ====================================================================
5965 : */
5966 : /* Loop processing each band. */
5967 : /* ====================================================================
5968 : */
5969 592300 : bool bHasFoundDensity = false;
5970 :
5971 592300 : const GPtrDiff_t iDstOffset =
5972 592300 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5973 1516060 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5974 : {
5975 923761 : double dfBandDensity = 0.0;
5976 923761 : double dfValueReal = 0.0;
5977 :
5978 : /* --------------------------------------------------------------------
5979 : */
5980 : /* Collect the source value. */
5981 : /* --------------------------------------------------------------------
5982 : */
5983 923761 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5984 : nSrcYSize == 1)
5985 : {
5986 : // FALSE is returned if dfBandDensity == 0, which is
5987 : // checked below.
5988 1012 : CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
5989 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
5990 : }
5991 922749 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5992 : {
5993 1326 : double dfValueImagIgnored = 0.0;
5994 1326 : GWKBilinearResample4Sample(
5995 1326 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5996 1326 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5997 1326 : &dfValueReal, &dfValueImagIgnored);
5998 : }
5999 921423 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
6000 : {
6001 299992 : if (bSrcMaskIsDensity)
6002 : {
6003 361 : if (poWK->eWorkingDataType == GDT_Byte)
6004 : {
6005 361 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
6006 361 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6007 361 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6008 : &dfValueReal);
6009 : }
6010 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
6011 : {
6012 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<
6013 0 : GUInt16>(poWK, iBand,
6014 0 : padfX[iDstX] - poWK->nSrcXOff,
6015 0 : padfY[iDstX] - poWK->nSrcYOff,
6016 : &dfBandDensity, &dfValueReal);
6017 : }
6018 : else
6019 : {
6020 0 : GWKCubicResampleSrcMaskIsDensity4SampleReal(
6021 0 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6022 0 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6023 : &dfValueReal);
6024 : }
6025 : }
6026 : else
6027 : {
6028 299631 : double dfValueImagIgnored = 0.0;
6029 299631 : GWKCubicResample4Sample(
6030 299631 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6031 299631 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6032 : &dfValueReal, &dfValueImagIgnored);
6033 299992 : }
6034 : }
6035 : else
6036 : #ifdef DEBUG
6037 : // Only useful for clang static analyzer.
6038 621431 : if (psWrkStruct != nullptr)
6039 : #endif
6040 : {
6041 621431 : double dfValueImagIgnored = 0.0;
6042 621431 : psWrkStruct->pfnGWKResample(
6043 621431 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6044 621431 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6045 : &dfValueReal, &dfValueImagIgnored, psWrkStruct);
6046 : }
6047 :
6048 : // If we didn't find any valid inputs skip to next band.
6049 923761 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
6050 0 : continue;
6051 :
6052 923761 : if (poWK->bApplyVerticalShift)
6053 : {
6054 0 : if (!std::isfinite(padfZ[iDstX]))
6055 0 : continue;
6056 : // Subtract padfZ[] since the coordinate transformation is
6057 : // from target to source
6058 0 : dfValueReal =
6059 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
6060 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
6061 : }
6062 :
6063 923761 : bHasFoundDensity = true;
6064 :
6065 : /* --------------------------------------------------------------------
6066 : */
6067 : /* We have a computed value from the source. Now apply it
6068 : * to */
6069 : /* the destination pixel. */
6070 : /* --------------------------------------------------------------------
6071 : */
6072 923761 : GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
6073 : dfValueReal);
6074 : }
6075 :
6076 592300 : if (!bHasFoundDensity)
6077 0 : continue;
6078 :
6079 : /* --------------------------------------------------------------------
6080 : */
6081 : /* Update destination density/validity masks. */
6082 : /* --------------------------------------------------------------------
6083 : */
6084 592300 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6085 :
6086 592300 : if (poWK->panDstValid != nullptr)
6087 : {
6088 101460 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6089 : }
6090 : } // Next iDstX.
6091 :
6092 : /* --------------------------------------------------------------------
6093 : */
6094 : /* Report progress to the user, and optionally cancel out. */
6095 : /* --------------------------------------------------------------------
6096 : */
6097 21382 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6098 0 : break;
6099 : }
6100 :
6101 : /* -------------------------------------------------------------------- */
6102 : /* Cleanup and return. */
6103 : /* -------------------------------------------------------------------- */
6104 133 : CPLFree(padfX);
6105 133 : CPLFree(padfY);
6106 133 : CPLFree(padfZ);
6107 133 : CPLFree(pabSuccess);
6108 133 : if (psWrkStruct)
6109 117 : GWKResampleDeleteWrkStruct(psWrkStruct);
6110 133 : }
6111 :
6112 133 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
6113 : {
6114 133 : return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
6115 : }
6116 :
6117 : /************************************************************************/
6118 : /* GWKCubicResampleNoMasks4MultiBandT() */
6119 : /************************************************************************/
6120 :
6121 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
6122 : /* and enough SSE registries */
6123 : #if defined(__x86_64) || defined(_M_X64)
6124 :
6125 238596 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
6126 : const __m128 row2, const __m128 row3,
6127 : const __m128 weightsXY0,
6128 : const __m128 weightsXY1,
6129 : const __m128 weightsXY2,
6130 : const __m128 weightsXY3)
6131 : {
6132 1670170 : return XMMHorizontalAdd(_mm_add_ps(
6133 : _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
6134 : _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
6135 238596 : _mm_mul_ps(row3, weightsXY3))));
6136 : }
6137 :
6138 : template <class T>
6139 81323 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
6140 : double dfSrcX, double dfSrcY,
6141 : const GPtrDiff_t iDstOffset)
6142 : {
6143 81323 : const double dfSrcXShifted = dfSrcX - 0.5;
6144 81323 : const int iSrcX = static_cast<int>(dfSrcXShifted);
6145 81323 : const double dfSrcYShifted = dfSrcY - 0.5;
6146 81323 : const int iSrcY = static_cast<int>(dfSrcYShifted);
6147 81323 : const GPtrDiff_t iSrcOffset =
6148 81323 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
6149 :
6150 : // Get the bilinear interpolation at the image borders.
6151 81323 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
6152 80326 : iSrcY + 2 >= poWK->nSrcYSize)
6153 : {
6154 7164 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6155 : {
6156 : T value;
6157 5373 : GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
6158 : &value);
6159 5373 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6160 : value;
6161 1791 : }
6162 : }
6163 : else
6164 : {
6165 79532 : const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
6166 79532 : const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
6167 :
6168 : float afCoeffsX[4];
6169 : float afCoeffsY[4];
6170 79532 : GWKCubicComputeWeights(fDeltaX, afCoeffsX);
6171 79532 : GWKCubicComputeWeights(fDeltaY, afCoeffsY);
6172 79532 : const auto weightsX = _mm_loadu_ps(afCoeffsX);
6173 : const auto weightsXY0 =
6174 159064 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
6175 : const auto weightsXY1 =
6176 159064 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
6177 : const auto weightsXY2 =
6178 159064 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
6179 : const auto weightsXY3 =
6180 79532 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
6181 :
6182 79532 : const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
6183 :
6184 79532 : int iBand = 0;
6185 : // Process 2 bands at a time
6186 159064 : for (; iBand + 1 < poWK->nBands; iBand += 2)
6187 : {
6188 79532 : const T *CPL_RESTRICT pBand0 =
6189 79532 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6190 79532 : const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
6191 : const auto row1_0 =
6192 79532 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6193 : const auto row2_0 =
6194 79532 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6195 : const auto row3_0 =
6196 79532 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6197 :
6198 79532 : const T *CPL_RESTRICT pBand1 =
6199 79532 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
6200 79532 : const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
6201 : const auto row1_1 =
6202 79532 : XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
6203 : const auto row2_1 =
6204 79532 : XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
6205 : const auto row3_1 =
6206 79532 : XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
6207 :
6208 : const float fValue_0 =
6209 79532 : Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
6210 : weightsXY1, weightsXY2, weightsXY3);
6211 :
6212 : const float fValue_1 =
6213 79532 : Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
6214 : weightsXY1, weightsXY2, weightsXY3);
6215 :
6216 79532 : T *CPL_RESTRICT pDstBand0 =
6217 79532 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6218 79532 : pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
6219 :
6220 79532 : T *CPL_RESTRICT pDstBand1 =
6221 79532 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
6222 79532 : pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
6223 : }
6224 79532 : if (iBand < poWK->nBands)
6225 : {
6226 79532 : const T *CPL_RESTRICT pBand0 =
6227 79532 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6228 79532 : const auto row0 = XMMLoad4Values(pBand0 + iOffset);
6229 : const auto row1 =
6230 79532 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6231 : const auto row2 =
6232 79532 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6233 : const auto row3 =
6234 79532 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6235 :
6236 : const float fValue =
6237 79532 : Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
6238 : weightsXY2, weightsXY3);
6239 :
6240 79532 : T *CPL_RESTRICT pDstBand =
6241 79532 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6242 79532 : pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
6243 : }
6244 : }
6245 :
6246 81323 : if (poWK->pafDstDensity)
6247 441 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6248 81323 : }
6249 :
6250 : #endif // defined(__x86_64) || defined(_M_X64)
6251 :
6252 : /************************************************************************/
6253 : /* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */
6254 : /************************************************************************/
6255 :
6256 : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
6257 1170 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
6258 :
6259 : {
6260 1170 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6261 1170 : GDALWarpKernel *poWK = psJob->poWK;
6262 1170 : const int iYMin = psJob->iYMin;
6263 1170 : const int iYMax = psJob->iYMax;
6264 1152 : const double dfMultFactorVerticalShiftPipeline =
6265 1170 : poWK->bApplyVerticalShift
6266 18 : ? CPLAtof(CSLFetchNameValueDef(
6267 18 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6268 : "1.0"))
6269 : : 0.0;
6270 :
6271 1170 : const int nDstXSize = poWK->nDstXSize;
6272 1170 : const int nSrcXSize = poWK->nSrcXSize;
6273 1170 : const int nSrcYSize = poWK->nSrcYSize;
6274 :
6275 : /* -------------------------------------------------------------------- */
6276 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6277 : /* scanlines worth of positions. */
6278 : /* -------------------------------------------------------------------- */
6279 :
6280 : // For x, 2 *, because we cache the precomputed values at the end.
6281 : double *padfX =
6282 1170 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6283 : double *padfY =
6284 1170 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6285 : double *padfZ =
6286 1170 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6287 1170 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6288 :
6289 1170 : const int nXRadius = poWK->nXRadius;
6290 : double *padfWeightsX =
6291 1170 : static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
6292 : double *padfWeightsY = static_cast<double *>(
6293 1170 : CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
6294 1170 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6295 1170 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6296 1170 : const double dfErrorThreshold = CPLAtof(
6297 1170 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6298 :
6299 : // Precompute values.
6300 254594 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6301 253424 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6302 :
6303 : /* ==================================================================== */
6304 : /* Loop over output lines. */
6305 : /* ==================================================================== */
6306 129808 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6307 : {
6308 : /* --------------------------------------------------------------------
6309 : */
6310 : /* Setup points to transform to source image space. */
6311 : /* --------------------------------------------------------------------
6312 : */
6313 128639 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6314 128639 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6315 58383044 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6316 58254436 : padfY[iDstX] = dfY;
6317 128639 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6318 :
6319 : /* --------------------------------------------------------------------
6320 : */
6321 : /* Transform the points from destination pixel/line coordinates */
6322 : /* to source pixel/line coordinates. */
6323 : /* --------------------------------------------------------------------
6324 : */
6325 128639 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6326 : padfY, padfZ, pabSuccess);
6327 128639 : if (dfSrcCoordPrecision > 0.0)
6328 : {
6329 1000 : GWKRoundSourceCoordinates(
6330 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6331 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6332 1000 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6333 : }
6334 :
6335 : /* ====================================================================
6336 : */
6337 : /* Loop over pixels in output scanline. */
6338 : /* ====================================================================
6339 : */
6340 58292984 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6341 : {
6342 58164366 : GPtrDiff_t iSrcOffset = 0;
6343 58164366 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6344 : padfX, padfY, nSrcXSize,
6345 : nSrcYSize, iSrcOffset))
6346 6540862 : continue;
6347 :
6348 : /* ====================================================================
6349 : */
6350 : /* Loop processing each band. */
6351 : /* ====================================================================
6352 : */
6353 51491452 : const GPtrDiff_t iDstOffset =
6354 51491452 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6355 :
6356 : #if defined(__x86_64) || defined(_M_X64)
6357 : if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
6358 : (std::is_same<T, GByte>::value ||
6359 : std::is_same<T, GUInt16>::value))
6360 : {
6361 752574 : if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
6362 : {
6363 81323 : GWKCubicResampleNoMasks4MultiBandT<T>(
6364 81323 : poWK, padfX[iDstX] - poWK->nSrcXOff,
6365 81323 : padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
6366 :
6367 81323 : continue;
6368 : }
6369 : }
6370 : #endif // defined(__x86_64) || defined(_M_X64)
6371 :
6372 51410129 : [[maybe_unused]] double dfInvWeights = 0;
6373 144108168 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6374 : {
6375 92484757 : T value = 0;
6376 : if constexpr (eResample == GRA_NearestNeighbour)
6377 : {
6378 76588049 : value = reinterpret_cast<T *>(
6379 76588049 : poWK->papabySrcImage[iBand])[iSrcOffset];
6380 : }
6381 : else if constexpr (bUse4SamplesFormula)
6382 : {
6383 : if constexpr (eResample == GRA_Bilinear)
6384 4806176 : GWKBilinearResampleNoMasks4SampleT(
6385 4806176 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6386 4806176 : padfY[iDstX] - poWK->nSrcYOff, &value);
6387 : else
6388 1906603 : GWKCubicResampleNoMasks4SampleT(
6389 1906603 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6390 1906603 : padfY[iDstX] - poWK->nSrcYOff, &value);
6391 : }
6392 : else
6393 : {
6394 9183929 : GWKResampleNoMasksT(
6395 9183929 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6396 9183929 : padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
6397 : padfWeightsY, dfInvWeights);
6398 : }
6399 :
6400 92481547 : if (poWK->bApplyVerticalShift)
6401 : {
6402 818 : if (!std::isfinite(padfZ[iDstX]))
6403 0 : continue;
6404 : // Subtract padfZ[] since the coordinate transformation is
6405 : // from target to source
6406 219426 : value = GWKClampValueT<T>(
6407 818 : value * poWK->dfMultFactorVerticalShift -
6408 818 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6409 : }
6410 :
6411 92698117 : if (poWK->pafDstDensity)
6412 11712299 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6413 :
6414 92698117 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6415 : value;
6416 : }
6417 : }
6418 :
6419 : /* --------------------------------------------------------------------
6420 : */
6421 : /* Report progress to the user, and optionally cancel out. */
6422 : /* --------------------------------------------------------------------
6423 : */
6424 128639 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6425 1 : break;
6426 : }
6427 :
6428 : /* -------------------------------------------------------------------- */
6429 : /* Cleanup and return. */
6430 : /* -------------------------------------------------------------------- */
6431 1170 : CPLFree(padfX);
6432 1170 : CPLFree(padfY);
6433 1170 : CPLFree(padfZ);
6434 1170 : CPLFree(pabSuccess);
6435 1170 : CPLFree(padfWeightsX);
6436 1170 : CPLFree(padfWeightsY);
6437 1170 : }
6438 :
6439 : template <class T, GDALResampleAlg eResample>
6440 915 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
6441 : {
6442 915 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6443 : pData);
6444 915 : }
6445 :
6446 : template <class T, GDALResampleAlg eResample>
6447 255 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
6448 :
6449 : {
6450 255 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6451 255 : GDALWarpKernel *poWK = psJob->poWK;
6452 : static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
6453 255 : const bool bUse4SamplesFormula =
6454 255 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
6455 255 : if (bUse4SamplesFormula)
6456 155 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
6457 : pData);
6458 : else
6459 100 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6460 : pData);
6461 255 : }
6462 :
6463 860 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6464 : {
6465 860 : return GWKRun(
6466 : poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
6467 860 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
6468 : }
6469 :
6470 125 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6471 : {
6472 125 : return GWKRun(
6473 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
6474 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
6475 125 : GRA_Bilinear>);
6476 : }
6477 :
6478 72 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6479 : {
6480 72 : return GWKRun(
6481 : poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
6482 72 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
6483 : }
6484 :
6485 9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6486 : {
6487 9 : return GWKRun(
6488 : poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
6489 9 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
6490 : }
6491 :
6492 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6493 :
6494 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6495 : {
6496 : return GWKRun(
6497 : poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
6498 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
6499 : }
6500 : #endif
6501 :
6502 12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6503 : {
6504 12 : return GWKRun(
6505 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
6506 12 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
6507 : }
6508 :
6509 : /************************************************************************/
6510 : /* GWKNearestByte() */
6511 : /* */
6512 : /* Case for 8bit input data with nearest neighbour resampling */
6513 : /* using valid flags. Should be as fast as possible for this */
6514 : /* particular transformation type. */
6515 : /************************************************************************/
6516 :
6517 343 : template <class T> static void GWKNearestThread(void *pData)
6518 :
6519 : {
6520 343 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6521 343 : GDALWarpKernel *poWK = psJob->poWK;
6522 343 : const int iYMin = psJob->iYMin;
6523 343 : const int iYMax = psJob->iYMax;
6524 343 : const double dfMultFactorVerticalShiftPipeline =
6525 343 : poWK->bApplyVerticalShift
6526 0 : ? CPLAtof(CSLFetchNameValueDef(
6527 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6528 : "1.0"))
6529 : : 0.0;
6530 :
6531 343 : const int nDstXSize = poWK->nDstXSize;
6532 343 : const int nSrcXSize = poWK->nSrcXSize;
6533 343 : const int nSrcYSize = poWK->nSrcYSize;
6534 :
6535 : /* -------------------------------------------------------------------- */
6536 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6537 : /* scanlines worth of positions. */
6538 : /* -------------------------------------------------------------------- */
6539 :
6540 : // For x, 2 *, because we cache the precomputed values at the end.
6541 : double *padfX =
6542 343 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6543 : double *padfY =
6544 343 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6545 : double *padfZ =
6546 343 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6547 343 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6548 :
6549 343 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6550 343 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6551 343 : const double dfErrorThreshold = CPLAtof(
6552 343 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6553 :
6554 : const bool bOneSourceCornerFailsToReproject =
6555 343 : GWKOneSourceCornerFailsToReproject(psJob);
6556 :
6557 : // Precompute values.
6558 49707 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6559 49364 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6560 :
6561 : /* ==================================================================== */
6562 : /* Loop over output lines. */
6563 : /* ==================================================================== */
6564 37157 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6565 : {
6566 :
6567 : /* --------------------------------------------------------------------
6568 : */
6569 : /* Setup points to transform to source image space. */
6570 : /* --------------------------------------------------------------------
6571 : */
6572 36814 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6573 36814 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6574 7743095 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6575 7706282 : padfY[iDstX] = dfY;
6576 36814 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6577 :
6578 : /* --------------------------------------------------------------------
6579 : */
6580 : /* Transform the points from destination pixel/line coordinates */
6581 : /* to source pixel/line coordinates. */
6582 : /* --------------------------------------------------------------------
6583 : */
6584 36814 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6585 : padfY, padfZ, pabSuccess);
6586 36814 : if (dfSrcCoordPrecision > 0.0)
6587 : {
6588 0 : GWKRoundSourceCoordinates(
6589 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6590 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6591 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6592 : }
6593 : /* ====================================================================
6594 : */
6595 : /* Loop over pixels in output scanline. */
6596 : /* ====================================================================
6597 : */
6598 7743095 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6599 : {
6600 7706282 : GPtrDiff_t iSrcOffset = 0;
6601 7706282 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6602 : padfX, padfY, nSrcXSize,
6603 : nSrcYSize, iSrcOffset))
6604 2164638 : continue;
6605 :
6606 : /* --------------------------------------------------------------------
6607 : */
6608 : /* Do not try to apply invalid source pixels to the dest. */
6609 : /* --------------------------------------------------------------------
6610 : */
6611 7524668 : if (poWK->panUnifiedSrcValid != nullptr &&
6612 931241 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6613 : {
6614 49670 : if (!bOneSourceCornerFailsToReproject)
6615 : {
6616 42185 : continue;
6617 : }
6618 7485 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
6619 : {
6620 5224 : continue;
6621 : }
6622 : }
6623 :
6624 : /* --------------------------------------------------------------------
6625 : */
6626 : /* Do not try to apply transparent source pixels to the
6627 : * destination.*/
6628 : /* --------------------------------------------------------------------
6629 : */
6630 6546016 : double dfDensity = 1.0;
6631 :
6632 6546016 : if (poWK->pafUnifiedSrcDensity != nullptr)
6633 : {
6634 1162245 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
6635 1162245 : if (dfDensity < SRC_DENSITY_THRESHOLD)
6636 1004371 : continue;
6637 : }
6638 :
6639 : /* ====================================================================
6640 : */
6641 : /* Loop processing each band. */
6642 : /* ====================================================================
6643 : */
6644 :
6645 5541654 : const GPtrDiff_t iDstOffset =
6646 5541654 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6647 :
6648 12873738 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6649 : {
6650 7332114 : T value = 0;
6651 7332114 : double dfBandDensity = 0.0;
6652 :
6653 : /* --------------------------------------------------------------------
6654 : */
6655 : /* Collect the source value. */
6656 : /* --------------------------------------------------------------------
6657 : */
6658 7332114 : if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
6659 : &value))
6660 : {
6661 :
6662 7332104 : if (poWK->bApplyVerticalShift)
6663 : {
6664 0 : if (!std::isfinite(padfZ[iDstX]))
6665 0 : continue;
6666 : // Subtract padfZ[] since the coordinate transformation
6667 : // is from target to source
6668 0 : value = GWKClampValueT<T>(
6669 0 : value * poWK->dfMultFactorVerticalShift -
6670 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6671 : }
6672 :
6673 7332104 : if (dfBandDensity < 1.0)
6674 : {
6675 159076 : if (dfBandDensity == 0.0)
6676 : {
6677 : // Do nothing.
6678 : }
6679 : else
6680 : {
6681 : // Let the general code take care of mixing.
6682 159076 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6683 : dfBandDensity, value);
6684 : }
6685 : }
6686 : else
6687 : {
6688 7173023 : reinterpret_cast<T *>(
6689 7173023 : poWK->papabyDstImage[iBand])[iDstOffset] = value;
6690 : }
6691 : }
6692 : }
6693 :
6694 : /* --------------------------------------------------------------------
6695 : */
6696 : /* Mark this pixel valid/opaque in the output. */
6697 : /* --------------------------------------------------------------------
6698 : */
6699 5541654 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6700 :
6701 5541654 : if (poWK->panDstValid != nullptr)
6702 : {
6703 4862206 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6704 : }
6705 : } /* Next iDstX */
6706 :
6707 : /* --------------------------------------------------------------------
6708 : */
6709 : /* Report progress to the user, and optionally cancel out. */
6710 : /* --------------------------------------------------------------------
6711 : */
6712 36814 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6713 0 : break;
6714 : }
6715 :
6716 : /* -------------------------------------------------------------------- */
6717 : /* Cleanup and return. */
6718 : /* -------------------------------------------------------------------- */
6719 343 : CPLFree(padfX);
6720 343 : CPLFree(padfY);
6721 343 : CPLFree(padfZ);
6722 343 : CPLFree(pabSuccess);
6723 343 : }
6724 :
6725 276 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
6726 : {
6727 276 : return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
6728 : }
6729 :
6730 18 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6731 : {
6732 18 : return GWKRun(
6733 : poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
6734 18 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
6735 : }
6736 :
6737 18 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6738 : {
6739 18 : return GWKRun(
6740 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
6741 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
6742 18 : GRA_Bilinear>);
6743 : }
6744 :
6745 6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6746 : {
6747 6 : return GWKRun(
6748 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
6749 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
6750 6 : GRA_Bilinear>);
6751 : }
6752 :
6753 5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6754 : {
6755 5 : return GWKRun(
6756 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
6757 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
6758 5 : GRA_Bilinear>);
6759 : }
6760 :
6761 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6762 :
6763 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6764 : {
6765 : return GWKRun(
6766 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
6767 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
6768 : GRA_Bilinear>);
6769 : }
6770 : #endif
6771 :
6772 5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6773 : {
6774 5 : return GWKRun(
6775 : poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
6776 5 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
6777 : }
6778 :
6779 12 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6780 : {
6781 12 : return GWKRun(
6782 : poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
6783 12 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
6784 : }
6785 :
6786 6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6787 : {
6788 6 : return GWKRun(
6789 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
6790 6 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
6791 : }
6792 :
6793 5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6794 : {
6795 5 : return GWKRun(
6796 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
6797 5 : GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
6798 : }
6799 :
6800 27 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
6801 : {
6802 27 : return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
6803 : }
6804 :
6805 11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6806 : {
6807 11 : return GWKRun(
6808 : poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
6809 11 : GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
6810 : }
6811 :
6812 36 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
6813 : {
6814 36 : return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
6815 : }
6816 :
6817 : /************************************************************************/
6818 : /* GWKAverageOrMode() */
6819 : /* */
6820 : /************************************************************************/
6821 :
6822 : static void GWKAverageOrModeThread(void *pData);
6823 :
6824 118 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
6825 : {
6826 118 : return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
6827 : }
6828 :
6829 : // Overall logic based on GWKGeneralCaseThread().
6830 118 : static void GWKAverageOrModeThread(void *pData)
6831 : {
6832 118 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6833 118 : GDALWarpKernel *poWK = psJob->poWK;
6834 118 : const int iYMin = psJob->iYMin;
6835 118 : const int iYMax = psJob->iYMax;
6836 : const double dfMultFactorVerticalShiftPipeline =
6837 118 : poWK->bApplyVerticalShift
6838 118 : ? CPLAtof(CSLFetchNameValueDef(
6839 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6840 : "1.0"))
6841 118 : : 0.0;
6842 :
6843 118 : const int nDstXSize = poWK->nDstXSize;
6844 118 : const int nSrcXSize = poWK->nSrcXSize;
6845 118 : const int nSrcYSize = poWK->nSrcYSize;
6846 :
6847 : /* -------------------------------------------------------------------- */
6848 : /* Find out which algorithm to use (small optim.) */
6849 : /* -------------------------------------------------------------------- */
6850 118 : int nAlgo = 0;
6851 :
6852 : // These vars only used with nAlgo == 3.
6853 118 : int *panVals = nullptr;
6854 118 : int nBins = 0;
6855 118 : int nBinsOffset = 0;
6856 :
6857 : // Only used with nAlgo = 2.
6858 118 : float *pafRealVals = nullptr;
6859 118 : float *pafImagVals = nullptr;
6860 118 : int *panRealSums = nullptr;
6861 118 : int *panImagSums = nullptr;
6862 :
6863 : // Only used with nAlgo = 6.
6864 118 : float quant = 0.5;
6865 :
6866 : // To control array allocation only when data type is complex
6867 118 : const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
6868 :
6869 118 : if (poWK->eResample == GRA_Average)
6870 : {
6871 71 : nAlgo = GWKAOM_Average;
6872 : }
6873 47 : else if (poWK->eResample == GRA_RMS)
6874 : {
6875 9 : nAlgo = GWKAOM_RMS;
6876 : }
6877 38 : else if (poWK->eResample == GRA_Mode)
6878 : {
6879 : // TODO check color table count > 256.
6880 11 : if (poWK->eWorkingDataType == GDT_Byte ||
6881 5 : poWK->eWorkingDataType == GDT_UInt16 ||
6882 5 : poWK->eWorkingDataType == GDT_Int16)
6883 : {
6884 9 : nAlgo = GWKAOM_Imode;
6885 :
6886 : // In the case of a paletted or non-paletted byte band,
6887 : // Input values are between 0 and 255.
6888 9 : if (poWK->eWorkingDataType == GDT_Byte)
6889 : {
6890 6 : nBins = 256;
6891 : }
6892 : // In the case of Int8, input values are between -128 and 127.
6893 3 : else if (poWK->eWorkingDataType == GDT_Int8)
6894 : {
6895 0 : nBins = 256;
6896 0 : nBinsOffset = 128;
6897 : }
6898 : // In the case of Int16, input values are between -32768 and 32767.
6899 3 : else if (poWK->eWorkingDataType == GDT_Int16)
6900 : {
6901 3 : nBins = 65536;
6902 3 : nBinsOffset = 32768;
6903 : }
6904 : // In the case of UInt16, input values are between 0 and 65537.
6905 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
6906 : {
6907 0 : nBins = 65536;
6908 : }
6909 : panVals =
6910 9 : static_cast<int *>(VSI_MALLOC_VERBOSE(nBins * sizeof(int)));
6911 9 : if (panVals == nullptr)
6912 0 : return;
6913 : }
6914 : else
6915 : {
6916 2 : nAlgo = GWKAOM_Fmode;
6917 :
6918 2 : if (nSrcXSize > 0 && nSrcYSize > 0)
6919 : {
6920 : pafRealVals = static_cast<float *>(
6921 2 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
6922 : panRealSums = static_cast<int *>(
6923 2 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(int)));
6924 2 : if (pafRealVals == nullptr || panRealSums == nullptr)
6925 : {
6926 0 : VSIFree(pafRealVals);
6927 0 : VSIFree(panRealSums);
6928 0 : return;
6929 : }
6930 : }
6931 : }
6932 : }
6933 27 : else if (poWK->eResample == GRA_Max)
6934 : {
6935 6 : nAlgo = GWKAOM_Max;
6936 : }
6937 21 : else if (poWK->eResample == GRA_Min)
6938 : {
6939 5 : nAlgo = GWKAOM_Min;
6940 : }
6941 16 : else if (poWK->eResample == GRA_Med)
6942 : {
6943 6 : nAlgo = GWKAOM_Quant;
6944 6 : quant = 0.5;
6945 : }
6946 10 : else if (poWK->eResample == GRA_Q1)
6947 : {
6948 5 : nAlgo = GWKAOM_Quant;
6949 5 : quant = 0.25;
6950 : }
6951 5 : else if (poWK->eResample == GRA_Q3)
6952 : {
6953 5 : nAlgo = GWKAOM_Quant;
6954 5 : quant = 0.75;
6955 : }
6956 : #ifdef disabled
6957 : else if (poWK->eResample == GRA_Sum)
6958 : {
6959 : nAlgo = GWKAOM_Sum;
6960 : }
6961 : #endif
6962 : else
6963 : {
6964 : // Other resample algorithms not permitted here.
6965 0 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
6966 : "illegal resample");
6967 0 : return;
6968 : }
6969 :
6970 118 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() using algo %d",
6971 : nAlgo);
6972 :
6973 : /* -------------------------------------------------------------------- */
6974 : /* Allocate x,y,z coordinate arrays for transformation ... two */
6975 : /* scanlines worth of positions. */
6976 : /* -------------------------------------------------------------------- */
6977 :
6978 : double *padfX =
6979 118 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6980 : double *padfY =
6981 118 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6982 : double *padfZ =
6983 118 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6984 : double *padfX2 =
6985 118 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6986 : double *padfY2 =
6987 118 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6988 : double *padfZ2 =
6989 118 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6990 118 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6991 118 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6992 :
6993 118 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6994 118 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6995 118 : const double dfErrorThreshold = CPLAtof(
6996 118 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6997 :
6998 : const double dfExcludedValuesThreshold =
6999 118 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7000 : "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
7001 118 : 100.0;
7002 : const double dfNodataValuesThreshold =
7003 118 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7004 : "NODATA_VALUES_PCT_THRESHOLD", "100")) /
7005 118 : 100.0;
7006 :
7007 : const int nXMargin =
7008 118 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7009 : const int nYMargin =
7010 118 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7011 :
7012 : /* ==================================================================== */
7013 : /* Loop over output lines. */
7014 : /* ==================================================================== */
7015 6603 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7016 : {
7017 :
7018 : /* --------------------------------------------------------------------
7019 : */
7020 : /* Setup points to transform to source image space. */
7021 : /* --------------------------------------------------------------------
7022 : */
7023 1669810 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7024 : {
7025 1663330 : padfX[iDstX] = iDstX + poWK->nDstXOff;
7026 1663330 : padfY[iDstX] = iDstY + poWK->nDstYOff;
7027 1663330 : padfZ[iDstX] = 0.0;
7028 1663330 : padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
7029 1663330 : padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
7030 1663330 : padfZ2[iDstX] = 0.0;
7031 : }
7032 :
7033 : /* --------------------------------------------------------------------
7034 : */
7035 : /* Transform the points from destination pixel/line coordinates */
7036 : /* to source pixel/line coordinates. */
7037 : /* --------------------------------------------------------------------
7038 : */
7039 6485 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
7040 : padfY, padfZ, pabSuccess);
7041 6485 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
7042 : padfY2, padfZ2, pabSuccess2);
7043 :
7044 6485 : if (dfSrcCoordPrecision > 0.0)
7045 : {
7046 0 : GWKRoundSourceCoordinates(
7047 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
7048 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
7049 0 : poWK->nDstXOff, iDstY + poWK->nDstYOff);
7050 0 : GWKRoundSourceCoordinates(
7051 : nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2,
7052 : dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
7053 0 : psJob->pTransformerArg, 1.0 + poWK->nDstXOff,
7054 0 : iDstY + 1.0 + poWK->nDstYOff);
7055 : }
7056 :
7057 : /* ====================================================================
7058 : */
7059 : /* Loop over pixels in output scanline. */
7060 : /* ====================================================================
7061 : */
7062 1669810 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7063 : {
7064 1663330 : GPtrDiff_t iSrcOffset = 0;
7065 1663330 : double dfDensity = 1.0;
7066 1663330 : bool bHasFoundDensity = false;
7067 :
7068 1663330 : if (!pabSuccess[iDstX] || !pabSuccess2[iDstX])
7069 311460 : continue;
7070 :
7071 : // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
7072 : // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
7073 1663330 : if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
7074 1663310 : padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
7075 1663310 : padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
7076 1663290 : padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
7077 1663290 : padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
7078 1663290 : padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
7079 1663280 : padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
7080 1663280 : padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
7081 : {
7082 62 : continue;
7083 : }
7084 :
7085 1663260 : const GPtrDiff_t iDstOffset =
7086 1663260 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7087 :
7088 : // Compute corners in source crs.
7089 :
7090 : // The transformation might not have preserved ordering of
7091 : // coordinates so do the necessary swapping (#5433).
7092 : // NOTE: this is really an approximative fix. To do something
7093 : // more precise we would for example need to compute the
7094 : // transformation of coordinates in the
7095 : // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
7096 : // coordinates, and take the bounding box of the got source
7097 : // coordinates.
7098 :
7099 1663260 : if (padfX[iDstX] > padfX2[iDstX])
7100 268744 : std::swap(padfX[iDstX], padfX2[iDstX]);
7101 :
7102 : // Detect situations where the target pixel is close to the
7103 : // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
7104 : // close to the left-most and right-most columns of the source
7105 : // raster. The 2 value below was experimentally determined to
7106 : // avoid false-positives and false-negatives.
7107 : // Addresses https://github.com/OSGeo/gdal/issues/6478
7108 1663260 : bool bWrapOverX = false;
7109 1663260 : const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
7110 1663260 : if (poWK->nSrcXOff == 0 &&
7111 1663260 : padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
7112 14495 : (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale <
7113 : nThresholdWrapOverX)
7114 : {
7115 : // Check there is a discontinuity by checking at mid-pixel.
7116 : // NOTE: all this remains fragile. To confidently
7117 : // detect antimeridian warping we should probably try to access
7118 : // georeferenced coordinates, and not rely only on tests on
7119 : // image space coordinates. But accessing georeferenced
7120 : // coordinates from here is not trivial, and we would for example
7121 : // have to handle both geographic, Mercator, etc.
7122 : // Let's hope this heuristics is good enough for now.
7123 1041 : double x = iDstX + 0.5 + poWK->nDstXOff;
7124 1041 : double y = iDstY + poWK->nDstYOff;
7125 1041 : double z = 0;
7126 1041 : int bSuccess = FALSE;
7127 1041 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y,
7128 : &z, &bSuccess);
7129 1041 : if (bSuccess && x < padfX[iDstX])
7130 : {
7131 1008 : bWrapOverX = true;
7132 1008 : std::swap(padfX[iDstX], padfX2[iDstX]);
7133 1008 : padfX2[iDstX] += nSrcXSize;
7134 : }
7135 : }
7136 :
7137 1663260 : const double dfXMin = padfX[iDstX] - poWK->nSrcXOff;
7138 1663260 : const double dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
7139 1663260 : constexpr double EPS = 1e-10;
7140 : // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
7141 1663260 : if (!(dfXMax > -EPS && dfXMin < nSrcXSize + EPS))
7142 72 : continue;
7143 1663190 : int iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPS), 0.0));
7144 1663190 : int iSrcXMax = static_cast<int>(
7145 1663190 : std::min(ceil(dfXMax - EPS), static_cast<double>(INT_MAX)));
7146 1663190 : if (!bWrapOverX)
7147 1662180 : iSrcXMax = std::min(iSrcXMax, nSrcXSize);
7148 1663190 : if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
7149 472 : iSrcXMax++;
7150 :
7151 1663190 : if (padfY[iDstX] > padfY2[iDstX])
7152 270107 : std::swap(padfY[iDstX], padfY2[iDstX]);
7153 1663190 : const double dfYMin = padfY[iDstX] - poWK->nSrcYOff;
7154 1663190 : const double dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
7155 : // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
7156 1663190 : if (!(dfYMax > -EPS && dfYMin < nSrcYSize + EPS))
7157 36 : continue;
7158 1663160 : int iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPS), 0.0));
7159 : int iSrcYMax =
7160 1663160 : std::min(static_cast<int>(ceil(dfYMax - EPS)), nSrcYSize);
7161 1663160 : if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
7162 0 : iSrcYMax++;
7163 :
7164 : #define COMPUTE_WEIGHT_Y(iSrcY) \
7165 : ((iSrcY == iSrcYMin) \
7166 : ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin)) \
7167 : : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax) \
7168 : : 1.0)
7169 :
7170 : #define COMPUTE_WEIGHT(iSrcX, dfWeightY) \
7171 : ((iSrcX == iSrcXMin) ? ((iSrcXMin + 1 == iSrcXMax) \
7172 : ? dfWeightY \
7173 : : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
7174 : : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax)) \
7175 : : dfWeightY)
7176 :
7177 1663160 : bool bDone = false;
7178 :
7179 : // Special Average mode where we process all bands together,
7180 : // to avoid averaging tuples that match an entry of m_aadfExcludedValues
7181 2267230 : if (nAlgo == GWKAOM_Average &&
7182 604073 : (!poWK->m_aadfExcludedValues.empty() ||
7183 393224 : dfNodataValuesThreshold < 1 - EPS) &&
7184 2267230 : !poWK->bApplyVerticalShift && !bIsComplex)
7185 : {
7186 393224 : double dfTotalWeightInvalid = 0.0;
7187 393224 : double dfTotalWeightExcluded = 0.0;
7188 393224 : double dfTotalWeightRegular = 0.0;
7189 786448 : std::vector<double> adfValueReal(poWK->nBands, 0);
7190 786448 : std::vector<double> adfValueAveraged(poWK->nBands, 0);
7191 : std::vector<int> anCountExcludedValues(
7192 393224 : poWK->m_aadfExcludedValues.size(), 0);
7193 :
7194 1572890 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7195 : {
7196 1179660 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7197 1179660 : iSrcOffset =
7198 1179660 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7199 5111860 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7200 : iSrcX++, iSrcOffset++)
7201 : {
7202 3932190 : if (bWrapOverX)
7203 0 : iSrcOffset =
7204 0 : (iSrcX % nSrcXSize) +
7205 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7206 :
7207 3932190 : const double dfWeight =
7208 3932190 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7209 3932190 : if (dfWeight <= 0)
7210 0 : continue;
7211 :
7212 3932200 : if (poWK->panUnifiedSrcValid != nullptr &&
7213 12 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7214 : {
7215 3 : dfTotalWeightInvalid += dfWeight;
7216 3 : continue;
7217 : }
7218 :
7219 3932190 : bool bAllValid = true;
7220 7274900 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7221 : {
7222 6160660 : double dfBandDensity = 0;
7223 6160660 : double dfValueImagTmp = 0;
7224 9503370 : if (!(GWKGetPixelValue(
7225 : poWK, iBand, iSrcOffset, &dfBandDensity,
7226 6160660 : &adfValueReal[iBand], &dfValueImagTmp) &&
7227 3342710 : dfBandDensity > BAND_DENSITY_THRESHOLD))
7228 : {
7229 2817950 : bAllValid = false;
7230 2817950 : break;
7231 : }
7232 : }
7233 :
7234 3932190 : if (!bAllValid)
7235 : {
7236 2817950 : dfTotalWeightInvalid += dfWeight;
7237 2817950 : continue;
7238 : }
7239 :
7240 1114240 : bool bExcludedValueFound = false;
7241 2228350 : for (size_t i = 0;
7242 2228350 : i < poWK->m_aadfExcludedValues.size(); ++i)
7243 : {
7244 1114130 : if (poWK->m_aadfExcludedValues[i] == adfValueReal)
7245 : {
7246 21 : bExcludedValueFound = true;
7247 21 : ++anCountExcludedValues[i];
7248 21 : dfTotalWeightExcluded += dfWeight;
7249 21 : break;
7250 : }
7251 : }
7252 1114240 : if (!bExcludedValueFound)
7253 : {
7254 : // Weighted incremental algorithm mean
7255 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7256 1114220 : dfTotalWeightRegular += dfWeight;
7257 4456870 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7258 : {
7259 3342650 : adfValueAveraged[iBand] +=
7260 6685300 : (dfWeight / dfTotalWeightRegular) *
7261 6685300 : (adfValueReal[iBand] -
7262 3342650 : adfValueAveraged[iBand]);
7263 : }
7264 : }
7265 : }
7266 : }
7267 :
7268 393224 : const double dfTotalWeight = dfTotalWeightInvalid +
7269 : dfTotalWeightExcluded +
7270 : dfTotalWeightRegular;
7271 393224 : if (dfTotalWeightInvalid > 0 &&
7272 : dfTotalWeightInvalid >=
7273 311293 : dfNodataValuesThreshold * dfTotalWeight)
7274 : {
7275 : // Do nothing. Let bHasFoundDensity to false.
7276 : }
7277 81934 : else if (dfTotalWeightExcluded > 0 &&
7278 : dfTotalWeightExcluded >=
7279 6 : dfExcludedValuesThreshold * dfTotalWeight)
7280 : {
7281 : // Find the most represented excluded value tuple
7282 3 : size_t iExcludedValue = 0;
7283 3 : int nExcludedValueCount = 0;
7284 6 : for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
7285 : ++i)
7286 : {
7287 3 : if (anCountExcludedValues[i] > nExcludedValueCount)
7288 : {
7289 3 : iExcludedValue = i;
7290 3 : nExcludedValueCount = anCountExcludedValues[i];
7291 : }
7292 : }
7293 :
7294 3 : bHasFoundDensity = true;
7295 :
7296 12 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7297 : {
7298 9 : GWKSetPixelValue(
7299 : poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
7300 9 : poWK->m_aadfExcludedValues[iExcludedValue][iBand],
7301 : 0);
7302 3 : }
7303 : }
7304 81931 : else if (dfTotalWeightRegular > 0)
7305 : {
7306 81931 : bHasFoundDensity = true;
7307 :
7308 327720 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7309 : {
7310 245789 : GWKSetPixelValue(poWK, iBand, iDstOffset,
7311 : /* dfBandDensity = */ 1.0,
7312 245789 : adfValueAveraged[iBand], 0);
7313 : }
7314 : }
7315 :
7316 : // Skip below loop on bands
7317 393224 : bDone = true;
7318 : }
7319 :
7320 : /* ====================================================================
7321 : */
7322 : /* Loop processing each band. */
7323 : /* ====================================================================
7324 : */
7325 :
7326 4439520 : for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
7327 : {
7328 2776360 : double dfBandDensity = 0.0;
7329 2776360 : double dfValueReal = 0.0;
7330 2776360 : double dfValueImag = 0.0;
7331 2776360 : double dfValueRealTmp = 0.0;
7332 2776360 : double dfValueImagTmp = 0.0;
7333 :
7334 : /* --------------------------------------------------------------------
7335 : */
7336 : /* Collect the source value. */
7337 : /* --------------------------------------------------------------------
7338 : */
7339 :
7340 : // Loop over source lines and pixels - 3 possible algorithms.
7341 :
7342 : // poWK->eResample == GRA_Average.
7343 2776360 : if (nAlgo == GWKAOM_Average)
7344 : {
7345 300849 : double dfTotalWeight = 0.0;
7346 :
7347 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7348 : // in gcore/overview.cpp.
7349 631308 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7350 : {
7351 330459 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7352 330459 : iSrcOffset = iSrcXMin +
7353 330459 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7354 803200 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7355 : iSrcX++, iSrcOffset++)
7356 : {
7357 472741 : if (bWrapOverX)
7358 630 : iSrcOffset =
7359 630 : (iSrcX % nSrcXSize) +
7360 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7361 :
7362 472745 : if (poWK->panUnifiedSrcValid != nullptr &&
7363 4 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7364 : iSrcOffset))
7365 : {
7366 1 : continue;
7367 : }
7368 :
7369 472740 : if (GWKGetPixelValue(
7370 : poWK, iBand, iSrcOffset, &dfBandDensity,
7371 945480 : &dfValueRealTmp, &dfValueImagTmp) &&
7372 472740 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7373 : {
7374 472740 : const double dfWeight =
7375 472740 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7376 472740 : if (dfWeight > 0)
7377 : {
7378 : // Weighted incremental algorithm mean
7379 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7380 472740 : dfTotalWeight += dfWeight;
7381 472740 : dfValueReal +=
7382 472740 : (dfWeight / dfTotalWeight) *
7383 472740 : (dfValueRealTmp - dfValueReal);
7384 472740 : if (bIsComplex)
7385 : {
7386 252 : dfValueImag +=
7387 252 : (dfWeight / dfTotalWeight) *
7388 252 : (dfValueImagTmp - dfValueImag);
7389 : }
7390 : }
7391 : }
7392 : }
7393 : }
7394 :
7395 300849 : if (dfTotalWeight > 0)
7396 : {
7397 300849 : if (poWK->bApplyVerticalShift)
7398 : {
7399 0 : if (!std::isfinite(padfZ[iDstX]))
7400 0 : continue;
7401 : // Subtract padfZ[] since the coordinate
7402 : // transformation is from target to source
7403 0 : dfValueReal =
7404 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7405 0 : padfZ[iDstX] *
7406 : dfMultFactorVerticalShiftPipeline;
7407 : }
7408 :
7409 300849 : dfBandDensity = 1;
7410 300849 : bHasFoundDensity = true;
7411 : }
7412 : } // GRA_Average.
7413 : // poWK->eResample == GRA_RMS.
7414 2776360 : if (nAlgo == GWKAOM_RMS)
7415 : {
7416 300416 : double dfTotalReal = 0.0;
7417 300416 : double dfTotalImag = 0.0;
7418 300416 : double dfTotalWeight = 0.0;
7419 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7420 : // in gcore/overview.cpp.
7421 630578 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7422 : {
7423 330162 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7424 330162 : iSrcOffset = iSrcXMin +
7425 330162 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7426 802723 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7427 : iSrcX++, iSrcOffset++)
7428 : {
7429 472561 : if (bWrapOverX)
7430 630 : iSrcOffset =
7431 630 : (iSrcX % nSrcXSize) +
7432 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7433 :
7434 472561 : if (poWK->panUnifiedSrcValid != nullptr &&
7435 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7436 : iSrcOffset))
7437 : {
7438 0 : continue;
7439 : }
7440 :
7441 472561 : if (GWKGetPixelValue(
7442 : poWK, iBand, iSrcOffset, &dfBandDensity,
7443 945122 : &dfValueRealTmp, &dfValueImagTmp) &&
7444 472561 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7445 : {
7446 472561 : const double dfWeight =
7447 472561 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7448 472561 : dfTotalWeight += dfWeight;
7449 472561 : dfTotalReal +=
7450 472561 : dfValueRealTmp * dfValueRealTmp * dfWeight;
7451 472561 : if (bIsComplex)
7452 48 : dfTotalImag += dfValueImagTmp *
7453 48 : dfValueImagTmp * dfWeight;
7454 : }
7455 : }
7456 : }
7457 :
7458 300416 : if (dfTotalWeight > 0)
7459 : {
7460 300416 : dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
7461 :
7462 300416 : if (poWK->bApplyVerticalShift)
7463 : {
7464 0 : if (!std::isfinite(padfZ[iDstX]))
7465 0 : continue;
7466 : // Subtract padfZ[] since the coordinate
7467 : // transformation is from target to source
7468 0 : dfValueReal =
7469 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7470 0 : padfZ[iDstX] *
7471 : dfMultFactorVerticalShiftPipeline;
7472 : }
7473 :
7474 300416 : if (bIsComplex)
7475 12 : dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
7476 :
7477 300416 : dfBandDensity = 1;
7478 300416 : bHasFoundDensity = true;
7479 : }
7480 : } // GRA_RMS.
7481 : #ifdef disabled
7482 : else if (nAlgo == GWKAOM_Sum)
7483 : // poWK->eResample == GRA_Sum
7484 : {
7485 : double dfTotalReal = 0.0;
7486 : double dfTotalImag = 0.0;
7487 : bool bFoundValid = false;
7488 :
7489 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7490 : {
7491 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7492 : iSrcOffset = iSrcXMin +
7493 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7494 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7495 : iSrcX++, iSrcOffset++)
7496 : {
7497 : if (bWrapOverX)
7498 : iSrcOffset =
7499 : (iSrcX % nSrcXSize) +
7500 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7501 :
7502 : if (poWK->panUnifiedSrcValid != nullptr &&
7503 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7504 : iSrcOffset))
7505 : {
7506 : continue;
7507 : }
7508 :
7509 : if (GWKGetPixelValue(
7510 : poWK, iBand, iSrcOffset, &dfBandDensity,
7511 : &dfValueRealTmp, &dfValueImagTmp) &&
7512 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7513 : {
7514 : const double dfWeight =
7515 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7516 : bFoundValid = true;
7517 : dfTotalReal += dfValueRealTmp * dfWeight;
7518 : if (bIsComplex)
7519 : {
7520 : dfTotalImag += dfValueImagTmp * dfWeight;
7521 : }
7522 : }
7523 : }
7524 : }
7525 :
7526 : if (bFoundValid)
7527 : {
7528 : dfValueReal = dfTotalReal;
7529 :
7530 : if (poWK->bApplyVerticalShift)
7531 : {
7532 : if (!std::isfinite(padfZ[iDstX]))
7533 : continue;
7534 : // Subtract padfZ[] since the coordinate
7535 : // transformation is from target to source
7536 : dfValueReal =
7537 : dfValueReal * poWK->dfMultFactorVerticalShift -
7538 : padfZ[iDstX] *
7539 : dfMultFactorVerticalShiftPipeline;
7540 : }
7541 :
7542 : if (bIsComplex)
7543 : {
7544 : dfValueImag = dfTotalImag;
7545 : }
7546 : dfBandDensity = 1;
7547 : bHasFoundDensity = true;
7548 : }
7549 : } // GRA_Sum.
7550 : #endif
7551 2475950 : else if (nAlgo == GWKAOM_Imode || nAlgo == GWKAOM_Fmode)
7552 : // poWK->eResample == GRA_Mode
7553 : {
7554 : // This code adapted from GDALDownsampleChunk32R_Mode() in
7555 : // gcore/overview.cpp.
7556 500014 : if (nAlgo == GWKAOM_Fmode) // int32 or float.
7557 : {
7558 : // Does it make sense it makes to run a
7559 : // majority filter on floating point data? But, here it
7560 : // is for the sake of compatibility. It won't look
7561 : // right on RGB images by the nature of the filter.
7562 3400 : int iMaxInd = 0;
7563 3400 : int iMaxVal = -1;
7564 3400 : int i = 0;
7565 :
7566 10200 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7567 : {
7568 6800 : iSrcOffset =
7569 6800 : iSrcXMin +
7570 6800 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7571 20400 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7572 : iSrcX++, iSrcOffset++)
7573 : {
7574 13600 : if (bWrapOverX)
7575 0 : iSrcOffset =
7576 0 : (iSrcX % nSrcXSize) +
7577 0 : static_cast<GPtrDiff_t>(iSrcY) *
7578 0 : nSrcXSize;
7579 :
7580 13600 : if (poWK->panUnifiedSrcValid != nullptr &&
7581 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7582 : iSrcOffset))
7583 0 : continue;
7584 :
7585 13600 : if (GWKGetPixelValue(
7586 : poWK, iBand, iSrcOffset, &dfBandDensity,
7587 27200 : &dfValueRealTmp, &dfValueImagTmp) &&
7588 13600 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7589 : {
7590 13600 : const float fVal =
7591 13600 : static_cast<float>(dfValueRealTmp);
7592 :
7593 : // Check array for existing entry.
7594 32685 : for (i = 0; i < iMaxInd; ++i)
7595 22512 : if (pafRealVals[i] == fVal &&
7596 2626 : ++panRealSums[i] >
7597 2626 : panRealSums[iMaxVal])
7598 : {
7599 801 : iMaxVal = i;
7600 801 : break;
7601 : }
7602 :
7603 : // Add to arr if entry not already there.
7604 13600 : if (i == iMaxInd)
7605 : {
7606 12799 : pafRealVals[iMaxInd] = fVal;
7607 12799 : panRealSums[iMaxInd] = 1;
7608 :
7609 12799 : if (iMaxVal < 0)
7610 3400 : iMaxVal = iMaxInd;
7611 :
7612 12799 : ++iMaxInd;
7613 : }
7614 : }
7615 : }
7616 : }
7617 :
7618 3400 : if (iMaxVal != -1)
7619 : {
7620 3400 : dfValueReal = pafRealVals[iMaxVal];
7621 :
7622 3400 : if (poWK->bApplyVerticalShift)
7623 : {
7624 0 : if (!std::isfinite(padfZ[iDstX]))
7625 0 : continue;
7626 : // Subtract padfZ[] since the coordinate
7627 : // transformation is from target to source
7628 0 : dfValueReal =
7629 0 : dfValueReal *
7630 0 : poWK->dfMultFactorVerticalShift -
7631 0 : padfZ[iDstX] *
7632 : dfMultFactorVerticalShiftPipeline;
7633 : }
7634 :
7635 3400 : dfBandDensity = 1;
7636 3400 : bHasFoundDensity = true;
7637 : }
7638 : }
7639 : else // byte or int16.
7640 : {
7641 496614 : int nMaxVal = 0;
7642 496614 : int iMaxInd = -1;
7643 :
7644 496614 : memset(panVals, 0, nBins * sizeof(int));
7645 :
7646 1612530 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7647 : {
7648 1115920 : iSrcOffset =
7649 1115920 : iSrcXMin +
7650 1115920 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7651 4733090 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7652 : iSrcX++, iSrcOffset++)
7653 : {
7654 3617170 : if (bWrapOverX)
7655 630 : iSrcOffset =
7656 630 : (iSrcX % nSrcXSize) +
7657 630 : static_cast<GPtrDiff_t>(iSrcY) *
7658 630 : nSrcXSize;
7659 :
7660 3617170 : if (poWK->panUnifiedSrcValid != nullptr &&
7661 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7662 : iSrcOffset))
7663 0 : continue;
7664 :
7665 3617170 : if (GWKGetPixelValue(
7666 : poWK, iBand, iSrcOffset, &dfBandDensity,
7667 7234340 : &dfValueRealTmp, &dfValueImagTmp) &&
7668 3617170 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7669 : {
7670 3617170 : const int nVal =
7671 3617170 : static_cast<int>(dfValueRealTmp);
7672 3617170 : if (++panVals[nVal + nBinsOffset] > nMaxVal)
7673 : {
7674 : // Sum the density.
7675 : // Is it the most common value so far?
7676 2812830 : iMaxInd = nVal;
7677 2812830 : nMaxVal = panVals[nVal + nBinsOffset];
7678 : }
7679 : }
7680 : }
7681 : }
7682 :
7683 496614 : if (iMaxInd != -1)
7684 : {
7685 496614 : dfValueReal = iMaxInd;
7686 :
7687 496614 : if (poWK->bApplyVerticalShift)
7688 : {
7689 0 : if (!std::isfinite(padfZ[iDstX]))
7690 0 : continue;
7691 : // Subtract padfZ[] since the coordinate
7692 : // transformation is from target to source
7693 0 : dfValueReal =
7694 0 : dfValueReal *
7695 0 : poWK->dfMultFactorVerticalShift -
7696 0 : padfZ[iDstX] *
7697 : dfMultFactorVerticalShiftPipeline;
7698 : }
7699 :
7700 496614 : dfBandDensity = 1;
7701 496614 : bHasFoundDensity = true;
7702 : }
7703 500014 : }
7704 : } // GRA_Mode.
7705 1975930 : else if (nAlgo == GWKAOM_Max)
7706 : // poWK->eResample == GRA_Max.
7707 : {
7708 335037 : bool bFoundValid = false;
7709 335037 : double dfTotalReal = std::numeric_limits<double>::lowest();
7710 : // This code adapted from nAlgo 1 method, GRA_Average.
7711 1288010 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7712 : {
7713 952975 : iSrcOffset = iSrcXMin +
7714 952975 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7715 4406540 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7716 : iSrcX++, iSrcOffset++)
7717 : {
7718 3453560 : if (bWrapOverX)
7719 630 : iSrcOffset =
7720 630 : (iSrcX % nSrcXSize) +
7721 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7722 :
7723 3456370 : if (poWK->panUnifiedSrcValid != nullptr &&
7724 2809 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7725 : iSrcOffset))
7726 : {
7727 2446 : continue;
7728 : }
7729 :
7730 : // Returns pixel value if it is not no data.
7731 3451120 : if (GWKGetPixelValue(
7732 : poWK, iBand, iSrcOffset, &dfBandDensity,
7733 6902230 : &dfValueRealTmp, &dfValueImagTmp) &&
7734 3451120 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7735 : {
7736 3451120 : bFoundValid = true;
7737 3451120 : if (dfTotalReal < dfValueRealTmp)
7738 : {
7739 442642 : dfTotalReal = dfValueRealTmp;
7740 : }
7741 : }
7742 : }
7743 : }
7744 :
7745 335037 : if (bFoundValid)
7746 : {
7747 335037 : dfValueReal = dfTotalReal;
7748 :
7749 335037 : if (poWK->bApplyVerticalShift)
7750 : {
7751 0 : if (!std::isfinite(padfZ[iDstX]))
7752 0 : continue;
7753 : // Subtract padfZ[] since the coordinate
7754 : // transformation is from target to source
7755 0 : dfValueReal =
7756 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7757 0 : padfZ[iDstX] *
7758 : dfMultFactorVerticalShiftPipeline;
7759 : }
7760 :
7761 335037 : dfBandDensity = 1;
7762 335037 : bHasFoundDensity = true;
7763 : }
7764 : } // GRA_Max.
7765 1640900 : else if (nAlgo == GWKAOM_Min)
7766 : // poWK->eResample == GRA_Min.
7767 : {
7768 335012 : bool bFoundValid = false;
7769 335012 : double dfTotalReal = std::numeric_limits<double>::max();
7770 : // This code adapted from nAlgo 1 method, GRA_Average.
7771 1287720 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7772 : {
7773 952710 : iSrcOffset = iSrcXMin +
7774 952710 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7775 4403460 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7776 : iSrcX++, iSrcOffset++)
7777 : {
7778 3450750 : if (bWrapOverX)
7779 630 : iSrcOffset =
7780 630 : (iSrcX % nSrcXSize) +
7781 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7782 :
7783 3450750 : if (poWK->panUnifiedSrcValid != nullptr &&
7784 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7785 : iSrcOffset))
7786 : {
7787 0 : continue;
7788 : }
7789 :
7790 : // Returns pixel value if it is not no data.
7791 3450750 : if (GWKGetPixelValue(
7792 : poWK, iBand, iSrcOffset, &dfBandDensity,
7793 6901500 : &dfValueRealTmp, &dfValueImagTmp) &&
7794 3450750 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7795 : {
7796 3450750 : bFoundValid = true;
7797 3450750 : if (dfTotalReal > dfValueRealTmp)
7798 : {
7799 443069 : dfTotalReal = dfValueRealTmp;
7800 : }
7801 : }
7802 : }
7803 : }
7804 :
7805 335012 : if (bFoundValid)
7806 : {
7807 335012 : dfValueReal = dfTotalReal;
7808 :
7809 335012 : if (poWK->bApplyVerticalShift)
7810 : {
7811 0 : if (!std::isfinite(padfZ[iDstX]))
7812 0 : continue;
7813 : // Subtract padfZ[] since the coordinate
7814 : // transformation is from target to source
7815 0 : dfValueReal =
7816 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7817 0 : padfZ[iDstX] *
7818 : dfMultFactorVerticalShiftPipeline;
7819 : }
7820 :
7821 335012 : dfBandDensity = 1;
7822 335012 : bHasFoundDensity = true;
7823 : }
7824 : } // GRA_Min.
7825 1305880 : else if (nAlgo == GWKAOM_Quant)
7826 : // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
7827 : {
7828 1005040 : bool bFoundValid = false;
7829 1005040 : std::vector<double> dfRealValuesTmp;
7830 :
7831 : // This code adapted from nAlgo 1 method, GRA_Average.
7832 3863170 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7833 : {
7834 2858130 : iSrcOffset = iSrcXMin +
7835 2858130 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7836 13210400 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7837 : iSrcX++, iSrcOffset++)
7838 : {
7839 10352300 : if (bWrapOverX)
7840 1890 : iSrcOffset =
7841 1890 : (iSrcX % nSrcXSize) +
7842 1890 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7843 :
7844 10352300 : if (poWK->panUnifiedSrcValid != nullptr &&
7845 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7846 : iSrcOffset))
7847 : {
7848 0 : continue;
7849 : }
7850 :
7851 : // Returns pixel value if it is not no data.
7852 10352300 : if (GWKGetPixelValue(
7853 : poWK, iBand, iSrcOffset, &dfBandDensity,
7854 20704500 : &dfValueRealTmp, &dfValueImagTmp) &&
7855 10352300 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7856 : {
7857 10352300 : bFoundValid = true;
7858 10352300 : dfRealValuesTmp.push_back(dfValueRealTmp);
7859 : }
7860 : }
7861 : }
7862 :
7863 1005040 : if (bFoundValid)
7864 : {
7865 1005040 : std::sort(dfRealValuesTmp.begin(),
7866 : dfRealValuesTmp.end());
7867 : int quantIdx = static_cast<int>(
7868 1005040 : std::ceil(quant * dfRealValuesTmp.size() - 1));
7869 1005040 : dfValueReal = dfRealValuesTmp[quantIdx];
7870 :
7871 1005040 : if (poWK->bApplyVerticalShift)
7872 : {
7873 0 : if (!std::isfinite(padfZ[iDstX]))
7874 0 : continue;
7875 : // Subtract padfZ[] since the coordinate
7876 : // transformation is from target to source
7877 0 : dfValueReal =
7878 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7879 0 : padfZ[iDstX] *
7880 : dfMultFactorVerticalShiftPipeline;
7881 : }
7882 :
7883 1005040 : dfBandDensity = 1;
7884 1005040 : bHasFoundDensity = true;
7885 1005040 : dfRealValuesTmp.clear();
7886 : }
7887 : } // Quantile.
7888 :
7889 : /* --------------------------------------------------------------------
7890 : */
7891 : /* We have a computed value from the source. Now apply it
7892 : * to */
7893 : /* the destination pixel. */
7894 : /* --------------------------------------------------------------------
7895 : */
7896 2776360 : if (bHasFoundDensity)
7897 : {
7898 : // TODO: Should we compute dfBandDensity in fct of
7899 : // nCount/nCount2, or use as a threshold to set the dest
7900 : // value?
7901 : // dfBandDensity = (float) nCount / nCount2;
7902 : // if( (float) nCount / nCount2 > 0.1 )
7903 : // or fix gdalwarp crop_to_cutline to crop partially
7904 : // overlapping pixels.
7905 2776360 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
7906 : dfValueReal, dfValueImag);
7907 : }
7908 : }
7909 :
7910 1663160 : if (!bHasFoundDensity)
7911 311290 : continue;
7912 :
7913 : /* --------------------------------------------------------------------
7914 : */
7915 : /* Update destination density/validity masks. */
7916 : /* --------------------------------------------------------------------
7917 : */
7918 1351870 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7919 :
7920 1351870 : if (poWK->panDstValid != nullptr)
7921 : {
7922 74 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7923 : }
7924 : } /* Next iDstX */
7925 :
7926 : /* --------------------------------------------------------------------
7927 : */
7928 : /* Report progress to the user, and optionally cancel out. */
7929 : /* --------------------------------------------------------------------
7930 : */
7931 6485 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7932 0 : break;
7933 : }
7934 :
7935 : /* -------------------------------------------------------------------- */
7936 : /* Cleanup and return. */
7937 : /* -------------------------------------------------------------------- */
7938 118 : CPLFree(padfX);
7939 118 : CPLFree(padfY);
7940 118 : CPLFree(padfZ);
7941 118 : CPLFree(padfX2);
7942 118 : CPLFree(padfY2);
7943 118 : CPLFree(padfZ2);
7944 118 : CPLFree(pabSuccess);
7945 118 : CPLFree(pabSuccess2);
7946 118 : VSIFree(panVals);
7947 118 : VSIFree(pafRealVals);
7948 118 : VSIFree(panRealSums);
7949 118 : if (bIsComplex)
7950 : {
7951 18 : VSIFree(pafImagVals);
7952 18 : VSIFree(panImagSums);
7953 : }
7954 : }
7955 :
7956 : /************************************************************************/
7957 : /* getOrientation() */
7958 : /************************************************************************/
7959 :
7960 : typedef std::pair<double, double> XYPair;
7961 :
7962 : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
7963 : // -1 if it is counter-clockwise oriented,
7964 : // or 0 if it is colinear.
7965 2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
7966 : {
7967 2355910 : const double p1x = p1.first;
7968 2355910 : const double p1y = p1.second;
7969 2355910 : const double p2x = p2.first;
7970 2355910 : const double p2y = p2.second;
7971 2355910 : const double p3x = p3.first;
7972 2355910 : const double p3y = p3.second;
7973 2355910 : const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
7974 2355910 : if (std::abs(val) < 1e-20)
7975 2690 : return 0;
7976 2353220 : else if (val > 0)
7977 0 : return 1;
7978 : else
7979 2353220 : return -1;
7980 : }
7981 :
7982 : /************************************************************************/
7983 : /* isConvex() */
7984 : /************************************************************************/
7985 :
7986 : typedef std::vector<XYPair> XYPoly;
7987 :
7988 : // poly must be closed
7989 785302 : static bool isConvex(const XYPoly &poly)
7990 : {
7991 785302 : const size_t n = poly.size();
7992 785302 : size_t i = 0;
7993 785302 : int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
7994 785302 : ++i;
7995 2355910 : for (; i < n - 2; ++i)
7996 : {
7997 : const int orientation =
7998 1570600 : getOrientation(poly[i], poly[i + 1], poly[i + 2]);
7999 1570600 : if (orientation != 0)
8000 : {
8001 1567910 : if (last_orientation == 0)
8002 0 : last_orientation = orientation;
8003 1567910 : else if (orientation != last_orientation)
8004 0 : return false;
8005 : }
8006 : }
8007 785302 : return true;
8008 : }
8009 :
8010 : /************************************************************************/
8011 : /* pointIntersectsConvexPoly() */
8012 : /************************************************************************/
8013 :
8014 : // Returns whether xy intersects poly, that must be closed and convex.
8015 6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
8016 : {
8017 6049100 : const size_t n = poly.size();
8018 6049100 : double dx1 = xy.first - poly[0].first;
8019 6049100 : double dy1 = xy.second - poly[0].second;
8020 6049100 : double dx2 = poly[1].first - poly[0].first;
8021 6049100 : double dy2 = poly[1].second - poly[0].second;
8022 6049100 : double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
8023 :
8024 : // Check if the point remains on the same side (left/right) of all edges
8025 14556400 : for (size_t i = 2; i < n; i++)
8026 : {
8027 12793100 : dx1 = xy.first - poly[i - 1].first;
8028 12793100 : dy1 = xy.second - poly[i - 1].second;
8029 :
8030 12793100 : dx2 = poly[i].first - poly[i - 1].first;
8031 12793100 : dy2 = poly[i].second - poly[i - 1].second;
8032 :
8033 12793100 : double crossProduct = dx1 * dy2 - dx2 * dy1;
8034 12793100 : if (std::abs(prevCrossProduct) < 1e-20)
8035 725558 : prevCrossProduct = crossProduct;
8036 12067500 : else if (prevCrossProduct * crossProduct < 0)
8037 4285760 : return false;
8038 : }
8039 :
8040 1763340 : return true;
8041 : }
8042 :
8043 : /************************************************************************/
8044 : /* getIntersection() */
8045 : /************************************************************************/
8046 :
8047 : /* Returns intersection of [p1,p2] with [p3,p4], if
8048 : * it is a single point, and the 2 segments are not colinear.
8049 : */
8050 11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
8051 : const XYPair &p3, const XYPair &p4, XYPair &xy)
8052 : {
8053 11811000 : const double x1 = p1.first;
8054 11811000 : const double y1 = p1.second;
8055 11811000 : const double x2 = p2.first;
8056 11811000 : const double y2 = p2.second;
8057 11811000 : const double x3 = p3.first;
8058 11811000 : const double y3 = p3.second;
8059 11811000 : const double x4 = p4.first;
8060 11811000 : const double y4 = p4.second;
8061 11811000 : const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
8062 11811000 : const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
8063 11811000 : if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
8064 9260780 : return false;
8065 :
8066 2550260 : const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
8067 2550260 : if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
8068 973924 : return false;
8069 :
8070 1576340 : const double t = t_num / denom;
8071 1576340 : xy.first = x1 + t * (x2 - x1);
8072 1576340 : xy.second = y1 + t * (y2 - y1);
8073 1576340 : return true;
8074 : }
8075 :
8076 : /************************************************************************/
8077 : /* getConvexPolyIntersection() */
8078 : /************************************************************************/
8079 :
8080 : // poly1 and poly2 must be closed and convex.
8081 : // The returned intersection will not necessary be closed.
8082 785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
8083 : XYPoly &intersection)
8084 : {
8085 785302 : intersection.clear();
8086 :
8087 : // Add all points of poly1 inside poly2
8088 3926510 : for (size_t i = 0; i < poly1.size() - 1; ++i)
8089 : {
8090 3141210 : if (pointIntersectsConvexPoly(poly1[i], poly2))
8091 1187430 : intersection.push_back(poly1[i]);
8092 : }
8093 785302 : if (intersection.size() == poly1.size() - 1)
8094 : {
8095 : // poly1 is inside poly2
8096 119100 : return;
8097 : }
8098 :
8099 : // Add all points of poly2 inside poly1
8100 3634860 : for (size_t i = 0; i < poly2.size() - 1; ++i)
8101 : {
8102 2907890 : if (pointIntersectsConvexPoly(poly2[i], poly1))
8103 575904 : intersection.push_back(poly2[i]);
8104 : }
8105 :
8106 : // Compute the intersection of all edges of both polygons
8107 726972 : XYPair xy;
8108 3634860 : for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
8109 : {
8110 14539400 : for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
8111 : {
8112 11631600 : if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
8113 11631600 : poly2[i2 + 1], xy))
8114 : {
8115 1576230 : intersection.push_back(xy);
8116 : }
8117 : }
8118 : }
8119 :
8120 726972 : if (intersection.empty())
8121 60770 : return;
8122 :
8123 : // Find lowest-left point in intersection set
8124 666202 : double lowest_x = std::numeric_limits<double>::max();
8125 666202 : double lowest_y = std::numeric_limits<double>::max();
8126 3772450 : for (const auto &pair : intersection)
8127 : {
8128 3106240 : const double x = pair.first;
8129 3106240 : const double y = pair.second;
8130 3106240 : if (y < lowest_y || (y == lowest_y && x < lowest_x))
8131 : {
8132 1096040 : lowest_x = x;
8133 1096040 : lowest_y = y;
8134 : }
8135 : }
8136 :
8137 5737980 : const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
8138 : {
8139 5737980 : const double p1x_diff = p1.first - lowest_x;
8140 5737980 : const double p1y_diff = p1.second - lowest_y;
8141 5737980 : const double p2x_diff = p2.first - lowest_x;
8142 5737980 : const double p2y_diff = p2.second - lowest_y;
8143 5737980 : if (p2y_diff == 0.0 && p1y_diff == 0.0)
8144 : {
8145 2655420 : if (p1x_diff >= 0)
8146 : {
8147 2655420 : if (p2x_diff >= 0)
8148 2655420 : return p1.first < p2.first;
8149 0 : return true;
8150 : }
8151 : else
8152 : {
8153 0 : if (p2x_diff >= 0)
8154 0 : return false;
8155 0 : return p1.first < p2.first;
8156 : }
8157 : }
8158 :
8159 3082560 : if (p2x_diff == 0.0 && p1x_diff == 0.0)
8160 1046960 : return p1.second < p2.second;
8161 :
8162 : double tan_p1;
8163 2035600 : if (p1x_diff == 0.0)
8164 464622 : tan_p1 = p1y_diff == 0.0 ? 0.0 : std::numeric_limits<double>::max();
8165 : else
8166 1570980 : tan_p1 = p1y_diff / p1x_diff;
8167 :
8168 : double tan_p2;
8169 2035600 : if (p2x_diff == 0.0)
8170 839515 : tan_p2 = p2y_diff == 0.0 ? 0.0 : std::numeric_limits<double>::max();
8171 : else
8172 1196080 : tan_p2 = p2y_diff / p2x_diff;
8173 :
8174 2035600 : if (tan_p1 >= 0)
8175 : {
8176 1904790 : if (tan_p2 >= 0)
8177 1881590 : return tan_p1 < tan_p2;
8178 : else
8179 23199 : return true;
8180 : }
8181 : else
8182 : {
8183 130806 : if (tan_p2 >= 0)
8184 103900 : return false;
8185 : else
8186 26906 : return tan_p1 < tan_p2;
8187 : }
8188 666202 : };
8189 :
8190 : // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
8191 : // hull
8192 666202 : std::sort(intersection.begin(), intersection.end(), sortFunc);
8193 :
8194 : // Remove duplicated points
8195 666202 : size_t j = 1;
8196 3106240 : for (size_t i = 1; i < intersection.size(); ++i)
8197 : {
8198 2440040 : if (intersection[i] != intersection[i - 1])
8199 : {
8200 1452560 : if (j < i)
8201 545275 : intersection[j] = intersection[i];
8202 1452560 : ++j;
8203 : }
8204 : }
8205 666202 : intersection.resize(j);
8206 : }
8207 :
8208 : /************************************************************************/
8209 : /* getArea() */
8210 : /************************************************************************/
8211 :
8212 : // poly may or may not be closed.
8213 558521 : static double getArea(const XYPoly &poly)
8214 : {
8215 : // CPLAssert(poly.size() >= 2);
8216 558521 : const size_t nPointCount = poly.size();
8217 : double dfAreaSum =
8218 558521 : poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
8219 :
8220 1765140 : for (size_t i = 1; i < nPointCount - 1; i++)
8221 : {
8222 1206610 : dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
8223 : }
8224 :
8225 558521 : dfAreaSum += poly[nPointCount - 1].first *
8226 558521 : (poly[0].second - poly[nPointCount - 2].second);
8227 :
8228 558521 : return 0.5 * std::fabs(dfAreaSum);
8229 : }
8230 :
8231 : /************************************************************************/
8232 : /* GWKSumPreserving() */
8233 : /************************************************************************/
8234 :
8235 : static void GWKSumPreservingThread(void *pData);
8236 :
8237 18 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
8238 : {
8239 18 : return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
8240 : }
8241 :
8242 18 : static void GWKSumPreservingThread(void *pData)
8243 : {
8244 18 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
8245 18 : GDALWarpKernel *poWK = psJob->poWK;
8246 18 : const int iYMin = psJob->iYMin;
8247 18 : const int iYMax = psJob->iYMax;
8248 : const bool bIsAffineNoRotation =
8249 18 : GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
8250 26 : poWK->pTransformerArg) &&
8251 : // for debug/testing purposes
8252 8 : CPLTestBool(
8253 18 : CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
8254 :
8255 18 : const int nDstXSize = poWK->nDstXSize;
8256 18 : const int nSrcXSize = poWK->nSrcXSize;
8257 18 : const int nSrcYSize = poWK->nSrcYSize;
8258 :
8259 36 : std::vector<double> adfX0(nSrcXSize + 1);
8260 36 : std::vector<double> adfY0(nSrcXSize + 1);
8261 36 : std::vector<double> adfZ0(nSrcXSize + 1);
8262 36 : std::vector<double> adfX1(nSrcXSize + 1);
8263 36 : std::vector<double> adfY1(nSrcXSize + 1);
8264 36 : std::vector<double> adfZ1(nSrcXSize + 1);
8265 36 : std::vector<int> abSuccess0(nSrcXSize + 1);
8266 36 : std::vector<int> abSuccess1(nSrcXSize + 1);
8267 :
8268 : CPLRectObj sGlobalBounds;
8269 18 : sGlobalBounds.minx = -2 * poWK->dfXScale;
8270 18 : sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
8271 18 : sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
8272 18 : sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
8273 18 : CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
8274 :
8275 : struct SourcePixel
8276 : {
8277 : int iSrcX;
8278 : int iSrcY;
8279 :
8280 : // Coordinates of source pixel in target pixel coordinates
8281 : double dfDstX0;
8282 : double dfDstY0;
8283 : double dfDstX1;
8284 : double dfDstY1;
8285 : double dfDstX2;
8286 : double dfDstY2;
8287 : double dfDstX3;
8288 : double dfDstY3;
8289 :
8290 : // Source pixel total area (might be larger than the one described
8291 : // by above coordinates, if the pixel was crossing the antimeridian
8292 : // and split)
8293 : double dfArea;
8294 : };
8295 :
8296 36 : std::vector<SourcePixel> sourcePixels;
8297 :
8298 36 : XYPoly discontinuityLeft(5);
8299 36 : XYPoly discontinuityRight(5);
8300 :
8301 : /* ==================================================================== */
8302 : /* First pass: transform the 4 corners of each potential */
8303 : /* contributing source pixel to target pixel coordinates. */
8304 : /* ==================================================================== */
8305 :
8306 : // Special case for top line
8307 : {
8308 18 : int iY = 0;
8309 1130 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8310 : {
8311 1112 : adfX1[iX] = iX + poWK->nSrcXOff;
8312 1112 : adfY1[iX] = iY + poWK->nSrcYOff;
8313 1112 : adfZ1[iX] = 0;
8314 : }
8315 :
8316 18 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8317 : adfX1.data(), adfY1.data(), adfZ1.data(),
8318 : abSuccess1.data());
8319 :
8320 1130 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8321 : {
8322 1112 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8323 0 : abSuccess1[iX] = FALSE;
8324 : else
8325 : {
8326 1112 : adfX1[iX] -= poWK->nDstXOff;
8327 1112 : adfY1[iX] -= poWK->nDstYOff;
8328 : }
8329 : }
8330 : }
8331 :
8332 413412 : const auto getInsideXSign = [poWK, nDstXSize](double dfX)
8333 : {
8334 413412 : return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
8335 205344 : dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
8336 413412 : ? 1
8337 208068 : : -1;
8338 18 : };
8339 :
8340 : const auto FindDiscontinuity =
8341 80 : [poWK, psJob, getInsideXSign](
8342 : double dfXLeft, double dfXRight, double dfY,
8343 : int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
8344 800 : double &dfXMidReprojectedRight, double &dfYMidReprojected)
8345 : {
8346 880 : for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
8347 : {
8348 800 : double dfXMid = (dfXLeft + dfXRight) / 2;
8349 800 : double dfXMidReprojected = dfXMid;
8350 800 : dfYMidReprojected = dfY;
8351 800 : double dfZ = 0;
8352 800 : int nSuccess = 0;
8353 800 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
8354 : &dfXMidReprojected, &dfYMidReprojected, &dfZ,
8355 : &nSuccess);
8356 800 : if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
8357 : {
8358 456 : dfXRight = dfXMid;
8359 456 : dfXMidReprojectedRight = dfXMidReprojected;
8360 : }
8361 : else
8362 : {
8363 344 : dfXLeft = dfXMid;
8364 344 : dfXMidReprojectedLeft = dfXMidReprojected;
8365 : }
8366 : }
8367 80 : };
8368 :
8369 566 : for (int iY = 0; iY < nSrcYSize; ++iY)
8370 : {
8371 548 : std::swap(adfX0, adfX1);
8372 548 : std::swap(adfY0, adfY1);
8373 548 : std::swap(adfZ0, adfZ1);
8374 548 : std::swap(abSuccess0, abSuccess1);
8375 :
8376 104512 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8377 : {
8378 103964 : adfX1[iX] = iX + poWK->nSrcXOff;
8379 103964 : adfY1[iX] = iY + 1 + poWK->nSrcYOff;
8380 103964 : adfZ1[iX] = 0;
8381 : }
8382 :
8383 548 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8384 : adfX1.data(), adfY1.data(), adfZ1.data(),
8385 : abSuccess1.data());
8386 :
8387 104512 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8388 : {
8389 103964 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8390 0 : abSuccess1[iX] = FALSE;
8391 : else
8392 : {
8393 103964 : adfX1[iX] -= poWK->nDstXOff;
8394 103964 : adfY1[iX] -= poWK->nDstYOff;
8395 : }
8396 : }
8397 :
8398 103964 : for (int iX = 0; iX < nSrcXSize; ++iX)
8399 : {
8400 206832 : if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
8401 103416 : abSuccess1[iX + 1])
8402 : {
8403 : /* --------------------------------------------------------------------
8404 : */
8405 : /* Do not try to apply transparent source pixels to the
8406 : * destination.*/
8407 : /* --------------------------------------------------------------------
8408 : */
8409 103416 : const auto iSrcOffset =
8410 103416 : iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
8411 105816 : if (poWK->panUnifiedSrcValid != nullptr &&
8412 2400 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
8413 : {
8414 10971 : continue;
8415 : }
8416 :
8417 103410 : if (poWK->pafUnifiedSrcDensity != nullptr)
8418 : {
8419 0 : if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
8420 : SRC_DENSITY_THRESHOLD)
8421 0 : continue;
8422 : }
8423 :
8424 : SourcePixel sp;
8425 103410 : sp.dfArea = 0;
8426 103410 : sp.dfDstX0 = adfX0[iX];
8427 103410 : sp.dfDstY0 = adfY0[iX];
8428 103410 : sp.dfDstX1 = adfX0[iX + 1];
8429 103410 : sp.dfDstY1 = adfY0[iX + 1];
8430 103410 : sp.dfDstX2 = adfX1[iX + 1];
8431 103410 : sp.dfDstY2 = adfY1[iX + 1];
8432 103410 : sp.dfDstX3 = adfX1[iX];
8433 103410 : sp.dfDstY3 = adfY1[iX];
8434 :
8435 : // Detect pixel that likely cross the anti-meridian and
8436 : // introduce a discontinuity when reprojected.
8437 :
8438 103410 : if (getInsideXSign(adfX0[iX]) !=
8439 103506 : getInsideXSign(adfX0[iX + 1]) &&
8440 164 : getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
8441 68 : getInsideXSign(adfX0[iX + 1]) ==
8442 103574 : getInsideXSign(adfX1[iX + 1]) &&
8443 40 : (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
8444 : 0)
8445 : {
8446 40 : double dfXMidReprojectedLeftTop = 0;
8447 40 : double dfXMidReprojectedRightTop = 0;
8448 40 : double dfYMidReprojectedTop = 0;
8449 40 : FindDiscontinuity(
8450 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8451 80 : iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
8452 : dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
8453 : dfYMidReprojectedTop);
8454 40 : double dfXMidReprojectedLeftBottom = 0;
8455 40 : double dfXMidReprojectedRightBottom = 0;
8456 40 : double dfYMidReprojectedBottom = 0;
8457 40 : FindDiscontinuity(
8458 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8459 80 : iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
8460 : dfXMidReprojectedLeftBottom,
8461 : dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
8462 :
8463 40 : discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
8464 40 : discontinuityLeft[1] =
8465 80 : XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
8466 40 : discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
8467 40 : dfYMidReprojectedBottom);
8468 40 : discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
8469 40 : discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
8470 :
8471 40 : discontinuityRight[0] =
8472 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8473 40 : discontinuityRight[1] =
8474 80 : XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
8475 40 : discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
8476 40 : dfYMidReprojectedBottom);
8477 40 : discontinuityRight[3] =
8478 80 : XYPair(adfX1[iX + 1], adfY1[iX + 1]);
8479 40 : discontinuityRight[4] =
8480 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8481 :
8482 40 : sp.dfArea = getArea(discontinuityLeft) +
8483 40 : getArea(discontinuityRight);
8484 40 : if (getInsideXSign(adfX0[iX]) >= 1)
8485 : {
8486 20 : sp.dfDstX1 = dfXMidReprojectedLeftTop;
8487 20 : sp.dfDstY1 = dfYMidReprojectedTop;
8488 20 : sp.dfDstX2 = dfXMidReprojectedLeftBottom;
8489 20 : sp.dfDstY2 = dfYMidReprojectedBottom;
8490 : }
8491 : else
8492 : {
8493 20 : sp.dfDstX0 = dfXMidReprojectedRightTop;
8494 20 : sp.dfDstY0 = dfYMidReprojectedTop;
8495 20 : sp.dfDstX3 = dfXMidReprojectedRightBottom;
8496 20 : sp.dfDstY3 = dfYMidReprojectedBottom;
8497 : }
8498 : }
8499 :
8500 : // Bounding box of source pixel (expressed in target pixel
8501 : // coordinates)
8502 : CPLRectObj sRect;
8503 103410 : sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
8504 103410 : std::min(sp.dfDstX2, sp.dfDstX3));
8505 103410 : sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
8506 103410 : std::min(sp.dfDstY2, sp.dfDstY3));
8507 103410 : sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
8508 103410 : std::max(sp.dfDstX2, sp.dfDstX3));
8509 103410 : sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
8510 103410 : std::max(sp.dfDstY2, sp.dfDstY3));
8511 103410 : if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
8512 101350 : sRect.miny < iYMax && sRect.maxy > iYMin))
8513 : {
8514 10852 : continue;
8515 : }
8516 :
8517 92558 : sp.iSrcX = iX;
8518 92558 : sp.iSrcY = iY;
8519 :
8520 92558 : if (!bIsAffineNoRotation)
8521 : {
8522 : // Check polygon validity (no self-crossing)
8523 89745 : XYPair xy;
8524 89745 : if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
8525 89745 : XYPair(sp.dfDstX1, sp.dfDstY1),
8526 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8527 269235 : XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
8528 89745 : getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
8529 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8530 89745 : XYPair(sp.dfDstX0, sp.dfDstY0),
8531 179490 : XYPair(sp.dfDstX3, sp.dfDstY3), xy))
8532 : {
8533 113 : continue;
8534 : }
8535 : }
8536 :
8537 92445 : CPLQuadTreeInsertWithBounds(
8538 : hQuadTree,
8539 : reinterpret_cast<void *>(
8540 92445 : static_cast<uintptr_t>(sourcePixels.size())),
8541 : &sRect);
8542 :
8543 92445 : sourcePixels.push_back(sp);
8544 : }
8545 : }
8546 : }
8547 :
8548 36 : std::vector<double> adfRealValue(poWK->nBands);
8549 36 : std::vector<double> adfImagValue(poWK->nBands);
8550 36 : std::vector<double> adfBandDensity(poWK->nBands);
8551 36 : std::vector<double> adfWeight(poWK->nBands);
8552 :
8553 : #ifdef CHECK_SUM_WITH_GEOS
8554 : auto hGEOSContext = OGRGeometry::createGEOSContext();
8555 : auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8556 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
8557 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
8558 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
8559 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
8560 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
8561 : auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
8562 : auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
8563 :
8564 : auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8565 : auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
8566 : auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
8567 : #endif
8568 :
8569 : const XYPoly xy1{
8570 36 : {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
8571 36 : XYPoly xy2(5);
8572 36 : XYPoly xy2_triangle(4);
8573 36 : XYPoly intersection;
8574 :
8575 : /* ==================================================================== */
8576 : /* Loop over output lines. */
8577 : /* ==================================================================== */
8578 891 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
8579 : {
8580 : CPLRectObj sRect;
8581 873 : sRect.miny = iDstY;
8582 873 : sRect.maxy = iDstY + 1;
8583 :
8584 : /* ====================================================================
8585 : */
8586 : /* Loop over pixels in output scanline. */
8587 : /* ====================================================================
8588 : */
8589 221042 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
8590 : {
8591 220169 : sRect.minx = iDstX;
8592 220169 : sRect.maxx = iDstX + 1;
8593 220169 : int nSourcePixels = 0;
8594 : void **pahSourcePixel =
8595 220169 : CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
8596 220169 : if (nSourcePixels == 0)
8597 : {
8598 1258 : CPLFree(pahSourcePixel);
8599 1262 : continue;
8600 : }
8601 :
8602 218911 : std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
8603 218911 : std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
8604 218911 : std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
8605 218911 : std::fill(adfWeight.begin(), adfWeight.end(), 0);
8606 218911 : double dfDensity = 0;
8607 218911 : double dfTotalWeight = 0;
8608 :
8609 : /* ====================================================================
8610 : */
8611 : /* Iterate over each contributing source pixel to add its
8612 : */
8613 : /* value weighed by the ratio of the area of its
8614 : * intersection */
8615 : /* with the target pixel divided by the area of the source
8616 : */
8617 : /* pixel. */
8618 : /* ====================================================================
8619 : */
8620 1020520 : for (int i = 0; i < nSourcePixels; ++i)
8621 : {
8622 801614 : const int iSourcePixel = static_cast<int>(
8623 801614 : reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
8624 801614 : auto &sp = sourcePixels[iSourcePixel];
8625 :
8626 801614 : double dfWeight = 0.0;
8627 801614 : if (bIsAffineNoRotation)
8628 : {
8629 : // Optimization since the source pixel is a rectangle in
8630 : // target pixel coordinates
8631 16312 : double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
8632 16312 : double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
8633 16312 : double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
8634 16312 : double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
8635 16312 : double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
8636 16312 : double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
8637 16312 : double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
8638 16312 : double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
8639 16312 : dfWeight =
8640 16312 : ((dfIntersMaxX - dfIntersMinX) *
8641 16312 : (dfIntersMaxY - dfIntersMinY)) /
8642 16312 : ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
8643 : }
8644 : else
8645 : {
8646 : // Compute the polygon of the source pixel in target pixel
8647 : // coordinates, and shifted to the target pixel (unit square
8648 : // coordinates)
8649 :
8650 785302 : xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8651 785302 : xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
8652 785302 : xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
8653 785302 : xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
8654 785302 : xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8655 :
8656 785302 : if (isConvex(xy2))
8657 : {
8658 785302 : getConvexPolyIntersection(xy1, xy2, intersection);
8659 785302 : if (intersection.size() >= 3)
8660 : {
8661 468849 : dfWeight = getArea(intersection);
8662 : }
8663 : }
8664 : else
8665 : {
8666 : // Split xy2 into 2 triangles.
8667 0 : xy2_triangle[0] = xy2[0];
8668 0 : xy2_triangle[1] = xy2[1];
8669 0 : xy2_triangle[2] = xy2[2];
8670 0 : xy2_triangle[3] = xy2[0];
8671 0 : getConvexPolyIntersection(xy1, xy2_triangle,
8672 : intersection);
8673 0 : if (intersection.size() >= 3)
8674 : {
8675 0 : dfWeight = getArea(intersection);
8676 : }
8677 :
8678 0 : xy2_triangle[1] = xy2[2];
8679 0 : xy2_triangle[2] = xy2[3];
8680 0 : getConvexPolyIntersection(xy1, xy2_triangle,
8681 : intersection);
8682 0 : if (intersection.size() >= 3)
8683 : {
8684 0 : dfWeight += getArea(intersection);
8685 : }
8686 : }
8687 785302 : if (dfWeight > 0.0)
8688 : {
8689 468828 : if (sp.dfArea == 0)
8690 89592 : sp.dfArea = getArea(xy2);
8691 468828 : dfWeight /= sp.dfArea;
8692 : }
8693 :
8694 : #ifdef CHECK_SUM_WITH_GEOS
8695 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
8696 : sp.dfDstX0 - iDstX,
8697 : sp.dfDstY0 - iDstY);
8698 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
8699 : sp.dfDstX1 - iDstX,
8700 : sp.dfDstY1 - iDstY);
8701 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
8702 : sp.dfDstX2 - iDstX,
8703 : sp.dfDstY2 - iDstY);
8704 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
8705 : sp.dfDstX3 - iDstX,
8706 : sp.dfDstY3 - iDstY);
8707 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
8708 : sp.dfDstX0 - iDstX,
8709 : sp.dfDstY0 - iDstY);
8710 :
8711 : double dfWeightGEOS = 0.0;
8712 : auto hIntersection =
8713 : GEOSIntersection_r(hGEOSContext, hP1, hP2);
8714 : if (hIntersection)
8715 : {
8716 : double dfIntersArea = 0.0;
8717 : if (GEOSArea_r(hGEOSContext, hIntersection,
8718 : &dfIntersArea) &&
8719 : dfIntersArea > 0)
8720 : {
8721 : double dfSourceArea = 0.0;
8722 : if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
8723 : {
8724 : dfWeightGEOS = dfIntersArea / dfSourceArea;
8725 : }
8726 : }
8727 : GEOSGeom_destroy_r(hGEOSContext, hIntersection);
8728 : }
8729 : if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
8730 : {
8731 : /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
8732 : dfWeight, dfWeightGEOS);
8733 : printf("xy2: "); // ok
8734 : for (const auto &xy : xy2)
8735 : printf("[%f, %f], ", xy.first, xy.second); // ok
8736 : printf("\n"); // ok
8737 : printf("intersection: "); // ok
8738 : for (const auto &xy : intersection)
8739 : printf("[%f, %f], ", xy.first, xy.second); // ok
8740 : printf("\n"); // ok
8741 : }
8742 : #endif
8743 : }
8744 801614 : if (dfWeight > 0.0)
8745 : {
8746 474099 : const GPtrDiff_t iSrcOffset =
8747 474099 : sp.iSrcX +
8748 474099 : static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
8749 474099 : dfTotalWeight += dfWeight;
8750 :
8751 474099 : if (poWK->pafUnifiedSrcDensity != nullptr)
8752 : {
8753 0 : dfDensity +=
8754 0 : dfWeight * poWK->pafUnifiedSrcDensity[iSrcOffset];
8755 : }
8756 : else
8757 : {
8758 474099 : dfDensity += dfWeight;
8759 : }
8760 :
8761 1818720 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
8762 : {
8763 : // Returns pixel value if it is not no data.
8764 : double dfBandDensity;
8765 : double dfRealValue;
8766 : double dfImagValue;
8767 2689240 : if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
8768 : &dfBandDensity, &dfRealValue,
8769 : &dfImagValue) &&
8770 1344620 : dfBandDensity > BAND_DENSITY_THRESHOLD))
8771 : {
8772 0 : continue;
8773 : }
8774 :
8775 1344620 : adfRealValue[iBand] += dfRealValue * dfWeight;
8776 1344620 : adfImagValue[iBand] += dfImagValue * dfWeight;
8777 1344620 : adfBandDensity[iBand] += dfBandDensity * dfWeight;
8778 1344620 : adfWeight[iBand] += dfWeight;
8779 : }
8780 : }
8781 : }
8782 :
8783 218911 : CPLFree(pahSourcePixel);
8784 :
8785 : /* --------------------------------------------------------------------
8786 : */
8787 : /* Update destination pixel value. */
8788 : /* --------------------------------------------------------------------
8789 : */
8790 218911 : bool bHasFoundDensity = false;
8791 218911 : const GPtrDiff_t iDstOffset =
8792 218911 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
8793 827822 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
8794 : {
8795 608911 : if (adfWeight[iBand] > 0)
8796 : {
8797 : const double dfBandDensity =
8798 608907 : adfBandDensity[iBand] / adfWeight[iBand];
8799 608907 : if (dfBandDensity > BAND_DENSITY_THRESHOLD)
8800 : {
8801 608907 : bHasFoundDensity = true;
8802 608907 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8803 608907 : adfRealValue[iBand],
8804 608907 : adfImagValue[iBand]);
8805 : }
8806 : }
8807 : }
8808 :
8809 218911 : if (!bHasFoundDensity)
8810 4 : continue;
8811 :
8812 : /* --------------------------------------------------------------------
8813 : */
8814 : /* Update destination density/validity masks. */
8815 : /* --------------------------------------------------------------------
8816 : */
8817 218907 : GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
8818 :
8819 218907 : if (poWK->panDstValid != nullptr)
8820 : {
8821 11750 : CPLMaskSet(poWK->panDstValid, iDstOffset);
8822 : }
8823 : }
8824 :
8825 : /* --------------------------------------------------------------------
8826 : */
8827 : /* Report progress to the user, and optionally cancel out. */
8828 : /* --------------------------------------------------------------------
8829 : */
8830 873 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8831 0 : break;
8832 : }
8833 :
8834 : #ifdef CHECK_SUM_WITH_GEOS
8835 : GEOSGeom_destroy_r(hGEOSContext, hP1);
8836 : GEOSGeom_destroy_r(hGEOSContext, hP2);
8837 : OGRGeometry::freeGEOSContext(hGEOSContext);
8838 : #endif
8839 18 : CPLQuadTreeDestroy(hQuadTree);
8840 18 : }
|