Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: High Performance Image Reprojector
4 : * Purpose: Implementation of the GDALWarpKernel class. Implements the actual
5 : * image warping for a "chunk" of input and output imagery already
6 : * loaded into memory.
7 : * Author: Frank Warmerdam, warmerdam@pobox.com
8 : *
9 : ******************************************************************************
10 : * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
11 : * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
12 : *
13 : * SPDX-License-Identifier: MIT
14 : ****************************************************************************/
15 :
16 : #include "cpl_port.h"
17 : #include "gdalwarper.h"
18 :
19 : #include <cfloat>
20 : #include <cmath>
21 : #include <cstddef>
22 : #include <cstdlib>
23 : #include <cstring>
24 :
25 : #include <algorithm>
26 : #include <limits>
27 : #include <mutex>
28 : #include <new>
29 : #include <utility>
30 : #include <vector>
31 :
32 : #include "cpl_atomic_ops.h"
33 : #include "cpl_conv.h"
34 : #include "cpl_error.h"
35 : #include "cpl_mask.h"
36 : #include "cpl_multiproc.h"
37 : #include "cpl_progress.h"
38 : #include "cpl_string.h"
39 : #include "cpl_vsi.h"
40 : #include "cpl_worker_thread_pool.h"
41 : #include "cpl_quad_tree.h"
42 : #include "gdal.h"
43 : #include "gdal_alg.h"
44 : #include "gdal_alg_priv.h"
45 : #include "gdal_thread_pool.h"
46 : #include "gdalresamplingkernels.h"
47 : #include "gdalwarpkernel_opencl.h"
48 :
49 : // #define CHECK_SUM_WITH_GEOS
50 : #ifdef CHECK_SUM_WITH_GEOS
51 : #include "ogr_geometry.h"
52 : #include "ogr_geos.h"
53 : #endif
54 :
55 : #ifdef USE_NEON_OPTIMIZATIONS
56 : #include "include_sse2neon.h"
57 : #define USE_SSE2
58 :
59 : #include "gdalsse_priv.h"
60 :
61 : // We restrict to 64bit processors because they are guaranteed to have SSE2.
62 : // Could possibly be used too on 32bit, but we would need to check at runtime.
63 : #elif defined(__x86_64) || defined(_M_X64)
64 : #define USE_SSE2
65 :
66 : #include "gdalsse_priv.h"
67 :
68 : #if __SSE4_1__
69 : #include <smmintrin.h>
70 : #endif
71 :
72 : #if __SSE3__
73 : #include <pmmintrin.h>
74 : #endif
75 :
76 : #endif
77 :
78 : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
79 : constexpr float SRC_DENSITY_THRESHOLD = 0.000000001f;
80 :
81 : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
82 :
83 : static const int anGWKFilterRadius[] = {
84 : 0, // Nearest neighbour
85 : 1, // Bilinear
86 : 2, // Cubic Convolution (Catmull-Rom)
87 : 2, // Cubic B-Spline
88 : 3, // Lanczos windowed sinc
89 : 0, // Average
90 : 0, // Mode
91 : 0, // Reserved GRA_Gauss=7
92 : 0, // Max
93 : 0, // Min
94 : 0, // Med
95 : 0, // Q1
96 : 0, // Q3
97 : 0, // Sum
98 : 0, // RMS
99 : };
100 :
101 : static double GWKBilinear(double dfX);
102 : static double GWKCubic(double dfX);
103 : static double GWKBSpline(double dfX);
104 : static double GWKLanczosSinc(double dfX);
105 :
106 : static const FilterFuncType apfGWKFilter[] = {
107 : nullptr, // Nearest neighbour
108 : GWKBilinear, // Bilinear
109 : GWKCubic, // Cubic Convolution (Catmull-Rom)
110 : GWKBSpline, // Cubic B-Spline
111 : GWKLanczosSinc, // Lanczos windowed sinc
112 : nullptr, // Average
113 : nullptr, // Mode
114 : nullptr, // Reserved GRA_Gauss=7
115 : nullptr, // Max
116 : nullptr, // Min
117 : nullptr, // Med
118 : nullptr, // Q1
119 : nullptr, // Q3
120 : nullptr, // Sum
121 : nullptr, // RMS
122 : };
123 :
124 : // TODO(schwehr): Can we make these functions have a const * const arg?
125 : static double GWKBilinear4Values(double *padfVals);
126 : static double GWKCubic4Values(double *padfVals);
127 : static double GWKBSpline4Values(double *padfVals);
128 : static double GWKLanczosSinc4Values(double *padfVals);
129 :
130 : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
131 : nullptr, // Nearest neighbour
132 : GWKBilinear4Values, // Bilinear
133 : GWKCubic4Values, // Cubic Convolution (Catmull-Rom)
134 : GWKBSpline4Values, // Cubic B-Spline
135 : GWKLanczosSinc4Values, // Lanczos windowed sinc
136 : nullptr, // Average
137 : nullptr, // Mode
138 : nullptr, // Reserved GRA_Gauss=7
139 : nullptr, // Max
140 : nullptr, // Min
141 : nullptr, // Med
142 : nullptr, // Q1
143 : nullptr, // Q3
144 : nullptr, // Sum
145 : nullptr, // RMS
146 : };
147 :
148 9583 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
149 : {
150 : static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
151 : "Bad size of anGWKFilterRadius");
152 9583 : return anGWKFilterRadius[eResampleAlg];
153 : }
154 :
155 3669 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
156 : {
157 : static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
158 : "Bad size of apfGWKFilter");
159 3669 : return apfGWKFilter[eResampleAlg];
160 : }
161 :
162 3667 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
163 : {
164 : static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
165 : "Bad size of apfGWKFilter4Values");
166 3667 : return apfGWKFilter4Values[eResampleAlg];
167 : }
168 :
169 : #ifdef HAVE_OPENCL
170 : static CPLErr GWKOpenCLCase(GDALWarpKernel *);
171 : #endif
172 :
173 : static CPLErr GWKGeneralCase(GDALWarpKernel *);
174 : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
175 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
176 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
177 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
178 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
179 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
180 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
181 : #endif
182 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
183 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
184 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
185 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
186 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
187 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
188 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
189 : #endif
190 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
191 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
192 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
193 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
194 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
195 : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
196 : static CPLErr GWKSumPreserving(GDALWarpKernel *);
197 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
198 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
199 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
200 :
201 : /************************************************************************/
202 : /* GWKJobStruct */
203 : /************************************************************************/
204 :
205 : struct GWKJobStruct
206 : {
207 : std::mutex &mutex;
208 : std::condition_variable &cv;
209 : int &counter;
210 : bool &stopFlag;
211 : GDALWarpKernel *poWK;
212 : int iYMin;
213 : int iYMax;
214 : int (*pfnProgress)(GWKJobStruct *psJob);
215 : void *pTransformerArg;
216 : void (*pfnFunc)(
217 : void *); // used by GWKRun() to assign the proper pTransformerArg
218 :
219 2032 : GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
220 : int &counter_, bool &stopFlag_)
221 2032 : : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_),
222 : poWK(nullptr), iYMin(0), iYMax(0), pfnProgress(nullptr),
223 2032 : pTransformerArg(nullptr), pfnFunc(nullptr)
224 : {
225 2032 : }
226 : };
227 :
228 : struct GWKThreadData
229 : {
230 : std::unique_ptr<CPLJobQueue> poJobQueue{};
231 : std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
232 : int nMaxThreads{0};
233 : int counter{0};
234 : bool stopFlag{false};
235 : std::mutex mutex{};
236 : std::condition_variable cv{};
237 : bool bTransformerArgInputAssignedToThread{false};
238 : void *pTransformerArgInput{
239 : nullptr}; // owned by calling layer. Not to be destroyed
240 : std::map<GIntBig, void *> mapThreadToTransformerArg{};
241 : int nTotalThreadCountForThisRun = 0;
242 : int nCurThreadCountForThisRun = 0;
243 : };
244 :
245 : /************************************************************************/
246 : /* GWKProgressThread() */
247 : /************************************************************************/
248 :
249 : // Return TRUE if the computation must be interrupted.
250 18 : static int GWKProgressThread(GWKJobStruct *psJob)
251 : {
252 18 : bool stop = false;
253 : {
254 18 : std::lock_guard<std::mutex> lock(psJob->mutex);
255 18 : psJob->counter++;
256 18 : stop = psJob->stopFlag;
257 : }
258 18 : psJob->cv.notify_one();
259 :
260 18 : return stop;
261 : }
262 :
263 : /************************************************************************/
264 : /* GWKProgressMonoThread() */
265 : /************************************************************************/
266 :
267 : // Return TRUE if the computation must be interrupted.
268 198791 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
269 : {
270 198791 : GDALWarpKernel *poWK = psJob->poWK;
271 : // coverity[missing_lock]
272 198791 : if (!poWK->pfnProgress(
273 198791 : poWK->dfProgressBase +
274 198791 : poWK->dfProgressScale *
275 198791 : (++psJob->counter / static_cast<double>(psJob->iYMax)),
276 : "", poWK->pProgress))
277 : {
278 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
279 1 : psJob->stopFlag = true;
280 1 : return TRUE;
281 : }
282 198790 : return FALSE;
283 : }
284 :
285 : /************************************************************************/
286 : /* GWKGenericMonoThread() */
287 : /************************************************************************/
288 :
289 2027 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
290 : void (*pfnFunc)(void *pUserData))
291 : {
292 2027 : GWKThreadData td;
293 :
294 : // NOTE: the mutex is not used.
295 2027 : GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
296 2027 : job.poWK = poWK;
297 2027 : job.iYMin = 0;
298 2027 : job.iYMax = poWK->nDstYSize;
299 2027 : job.pfnProgress = GWKProgressMonoThread;
300 2027 : job.pTransformerArg = poWK->pTransformerArg;
301 2027 : pfnFunc(&job);
302 :
303 4054 : return td.stopFlag ? CE_Failure : CE_None;
304 : }
305 :
306 : /************************************************************************/
307 : /* GWKThreadsCreate() */
308 : /************************************************************************/
309 :
310 1401 : void *GWKThreadsCreate(char **papszWarpOptions,
311 : GDALTransformerFunc /* pfnTransformer */,
312 : void *pTransformerArg)
313 : {
314 : const char *pszWarpThreads =
315 1401 : CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
316 1401 : if (pszWarpThreads == nullptr)
317 1401 : pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
318 :
319 1401 : int nThreads = 0;
320 1401 : if (EQUAL(pszWarpThreads, "ALL_CPUS"))
321 3 : nThreads = CPLGetNumCPUs();
322 : else
323 1398 : nThreads = atoi(pszWarpThreads);
324 1401 : if (nThreads <= 1)
325 1396 : nThreads = 0;
326 1401 : if (nThreads > 128)
327 0 : nThreads = 128;
328 :
329 1401 : GWKThreadData *psThreadData = new GWKThreadData();
330 : auto poThreadPool =
331 1401 : nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
332 1401 : if (nThreads && poThreadPool)
333 : {
334 5 : psThreadData->nMaxThreads = nThreads;
335 5 : psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
336 : nThreads,
337 5 : GWKJobStruct(psThreadData->mutex, psThreadData->cv,
338 10 : psThreadData->counter, psThreadData->stopFlag)));
339 :
340 5 : psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
341 5 : psThreadData->pTransformerArgInput = pTransformerArg;
342 : }
343 :
344 1401 : return psThreadData;
345 : }
346 :
347 : /************************************************************************/
348 : /* GWKThreadsEnd() */
349 : /************************************************************************/
350 :
351 1401 : void GWKThreadsEnd(void *psThreadDataIn)
352 : {
353 1401 : if (psThreadDataIn == nullptr)
354 0 : return;
355 :
356 1401 : GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
357 1401 : if (psThreadData->poJobQueue)
358 : {
359 : // cppcheck-suppress constVariableReference
360 15 : for (auto &pair : psThreadData->mapThreadToTransformerArg)
361 : {
362 10 : CPLAssert(pair.second != psThreadData->pTransformerArgInput);
363 10 : GDALDestroyTransformer(pair.second);
364 : }
365 5 : psThreadData->poJobQueue.reset();
366 : }
367 1401 : delete psThreadData;
368 : }
369 :
370 : /************************************************************************/
371 : /* ThreadFuncAdapter() */
372 : /************************************************************************/
373 :
374 15 : static void ThreadFuncAdapter(void *pData)
375 : {
376 15 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
377 15 : GWKThreadData *psThreadData =
378 15 : static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
379 :
380 : // Look if we have already a per-thread transformer
381 15 : void *pTransformerArg = nullptr;
382 15 : const GIntBig nThreadId = CPLGetPID();
383 :
384 : {
385 30 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
386 15 : ++psThreadData->nCurThreadCountForThisRun;
387 :
388 15 : auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
389 15 : if (oIter != psThreadData->mapThreadToTransformerArg.end())
390 : {
391 0 : pTransformerArg = oIter->second;
392 : }
393 15 : else if (!psThreadData->bTransformerArgInputAssignedToThread &&
394 15 : psThreadData->nCurThreadCountForThisRun ==
395 15 : psThreadData->nTotalThreadCountForThisRun)
396 : {
397 : // If we are the last thread to be started, temporarily borrow the
398 : // original transformer
399 5 : psThreadData->bTransformerArgInputAssignedToThread = true;
400 5 : pTransformerArg = psThreadData->pTransformerArgInput;
401 5 : psThreadData->mapThreadToTransformerArg[nThreadId] =
402 : pTransformerArg;
403 : }
404 :
405 15 : if (pTransformerArg == nullptr)
406 : {
407 10 : CPLAssert(psThreadData->pTransformerArgInput != nullptr);
408 10 : CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
409 : }
410 : }
411 :
412 : // If no transformer assigned to current thread, instantiate one
413 15 : if (pTransformerArg == nullptr)
414 : {
415 : // This somehow assumes that GDALCloneTransformer() is thread-safe
416 : // which should normally be the case.
417 : pTransformerArg =
418 10 : GDALCloneTransformer(psThreadData->pTransformerArgInput);
419 :
420 : // Lock for the stop flag and the transformer map.
421 10 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
422 10 : if (!pTransformerArg)
423 : {
424 0 : psJob->stopFlag = true;
425 0 : return;
426 : }
427 10 : psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
428 : }
429 :
430 15 : psJob->pTransformerArg = pTransformerArg;
431 15 : psJob->pfnFunc(pData);
432 :
433 : // Give back original transformer, if borrowed.
434 : {
435 30 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
436 15 : if (psThreadData->bTransformerArgInputAssignedToThread &&
437 8 : pTransformerArg == psThreadData->pTransformerArgInput)
438 : {
439 : psThreadData->mapThreadToTransformerArg.erase(
440 5 : psThreadData->mapThreadToTransformerArg.find(nThreadId));
441 5 : psThreadData->bTransformerArgInputAssignedToThread = false;
442 : }
443 : }
444 : }
445 :
446 : /************************************************************************/
447 : /* GWKRun() */
448 : /************************************************************************/
449 :
450 2032 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
451 : void (*pfnFunc)(void *pUserData))
452 :
453 : {
454 2032 : const int nDstYSize = poWK->nDstYSize;
455 :
456 2032 : CPLDebug("GDAL",
457 : "GDALWarpKernel()::%s() "
458 : "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
459 : pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
460 : poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
461 : poWK->nDstYSize);
462 :
463 2032 : if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
464 : {
465 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
466 0 : return CE_Failure;
467 : }
468 :
469 2032 : GWKThreadData *psThreadData =
470 : static_cast<GWKThreadData *>(poWK->psThreadData);
471 2032 : if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
472 : {
473 2027 : return GWKGenericMonoThread(poWK, pfnFunc);
474 : }
475 :
476 5 : int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
477 : // Config option mostly useful for tests to be able to test multithreading
478 : // with small rasters
479 : const int nWarpChunkSize =
480 5 : atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
481 5 : if (nWarpChunkSize > 0)
482 : {
483 3 : GIntBig nChunks =
484 3 : static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
485 3 : if (nThreads > nChunks)
486 1 : nThreads = static_cast<int>(nChunks);
487 : }
488 5 : if (nThreads <= 0)
489 1 : nThreads = 1;
490 :
491 5 : CPLDebug("WARP", "Using %d threads", nThreads);
492 :
493 5 : auto &jobs = *psThreadData->threadJobs;
494 5 : CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
495 : // Fill-in job structures.
496 20 : for (int i = 0; i < nThreads; ++i)
497 : {
498 15 : auto &job = jobs[i];
499 15 : job.poWK = poWK;
500 15 : job.iYMin =
501 15 : static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
502 15 : job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
503 15 : nThreads);
504 15 : if (poWK->pfnProgress != GDALDummyProgress)
505 1 : job.pfnProgress = GWKProgressThread;
506 15 : job.pfnFunc = pfnFunc;
507 : }
508 :
509 : bool bStopFlag;
510 : {
511 5 : std::unique_lock<std::mutex> lock(psThreadData->mutex);
512 :
513 5 : psThreadData->nTotalThreadCountForThisRun = nThreads;
514 : // coverity[missing_lock]
515 5 : psThreadData->nCurThreadCountForThisRun = 0;
516 :
517 : // Start jobs.
518 20 : for (int i = 0; i < nThreads; ++i)
519 : {
520 15 : auto &job = jobs[i];
521 15 : psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
522 : static_cast<void *>(&job));
523 : }
524 :
525 : /* --------------------------------------------------------------------
526 : */
527 : /* Report progress. */
528 : /* --------------------------------------------------------------------
529 : */
530 5 : if (poWK->pfnProgress != GDALDummyProgress)
531 : {
532 1 : while (psThreadData->counter < nDstYSize)
533 : {
534 1 : psThreadData->cv.wait(lock);
535 1 : if (!poWK->pfnProgress(poWK->dfProgressBase +
536 1 : poWK->dfProgressScale *
537 1 : (psThreadData->counter /
538 1 : static_cast<double>(nDstYSize)),
539 : "", poWK->pProgress))
540 : {
541 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
542 1 : psThreadData->stopFlag = true;
543 1 : break;
544 : }
545 : }
546 : }
547 :
548 5 : bStopFlag = psThreadData->stopFlag;
549 : }
550 :
551 : /* -------------------------------------------------------------------- */
552 : /* Wait for all jobs to complete. */
553 : /* -------------------------------------------------------------------- */
554 5 : psThreadData->poJobQueue->WaitCompletion();
555 :
556 5 : return bStopFlag ? CE_Failure : CE_None;
557 : }
558 :
559 : /************************************************************************/
560 : /* ==================================================================== */
561 : /* GDALWarpKernel */
562 : /* ==================================================================== */
563 : /************************************************************************/
564 :
565 : /**
566 : * \class GDALWarpKernel "gdalwarper.h"
567 : *
568 : * Low level image warping class.
569 : *
570 : * This class is responsible for low level image warping for one
571 : * "chunk" of imagery. The class is essentially a structure with all
572 : * data members public - primarily so that new special-case functions
573 : * can be added without changing the class declaration.
574 : *
575 : * Applications are normally intended to interactive with warping facilities
576 : * through the GDALWarpOperation class, though the GDALWarpKernel can in
577 : * theory be used directly if great care is taken in setting up the
578 : * control data.
579 : *
580 : * <h3>Design Issues</h3>
581 : *
582 : * The intention is that PerformWarp() would analyze the setup in terms
583 : * of the datatype, resampling type, and validity/density mask usage and
584 : * pick one of many specific implementations of the warping algorithm over
585 : * a continuum of optimization vs. generality. At one end there will be a
586 : * reference general purpose implementation of the algorithm that supports
587 : * any data type (working internally in double precision complex), all three
588 : * resampling types, and any or all of the validity/density masks. At the
589 : * other end would be highly optimized algorithms for common cases like
590 : * nearest neighbour resampling on GDT_Byte data with no masks.
591 : *
592 : * The full set of optimized versions have not been decided but we should
593 : * expect to have at least:
594 : * - One for each resampling algorithm for 8bit data with no masks.
595 : * - One for each resampling algorithm for float data with no masks.
596 : * - One for each resampling algorithm for float data with any/all masks
597 : * (essentially the generic case for just float data).
598 : * - One for each resampling algorithm for 8bit data with support for
599 : * input validity masks (per band or per pixel). This handles the common
600 : * case of nodata masking.
601 : * - One for each resampling algorithm for float data with support for
602 : * input validity masks (per band or per pixel). This handles the common
603 : * case of nodata masking.
604 : *
605 : * Some of the specializations would operate on all bands in one pass
606 : * (especially the ones without masking would do this), while others might
607 : * process each band individually to reduce code complexity.
608 : *
609 : * <h3>Masking Semantics</h3>
610 : *
611 : * A detailed explanation of the semantics of the validity and density masks,
612 : * and their effects on resampling kernels is needed here.
613 : */
614 :
615 : /************************************************************************/
616 : /* GDALWarpKernel Data Members */
617 : /************************************************************************/
618 :
619 : /**
620 : * \var GDALResampleAlg GDALWarpKernel::eResample;
621 : *
622 : * Resampling algorithm.
623 : *
624 : * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear,
625 : * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
626 : * GRA_Mode or GRA_Sum.
627 : *
628 : * This field is required. GDT_NearestNeighbour may be used as a default
629 : * value.
630 : */
631 :
632 : /**
633 : * \var GDALDataType GDALWarpKernel::eWorkingDataType;
634 : *
635 : * Working pixel data type.
636 : *
637 : * The datatype of pixels in the source image (papabySrcimage) and
638 : * destination image (papabyDstImage) buffers. Note that operations on
639 : * some data types (such as GDT_Byte) may be much better optimized than other
640 : * less common cases.
641 : *
642 : * This field is required. It may not be GDT_Unknown.
643 : */
644 :
645 : /**
646 : * \var int GDALWarpKernel::nBands;
647 : *
648 : * Number of bands.
649 : *
650 : * The number of bands (layers) of imagery being warped. Determines the
651 : * number of entries in the papabySrcImage, papanBandSrcValid,
652 : * and papabyDstImage arrays.
653 : *
654 : * This field is required.
655 : */
656 :
657 : /**
658 : * \var int GDALWarpKernel::nSrcXSize;
659 : *
660 : * Source image width in pixels.
661 : *
662 : * This field is required.
663 : */
664 :
665 : /**
666 : * \var int GDALWarpKernel::nSrcYSize;
667 : *
668 : * Source image height in pixels.
669 : *
670 : * This field is required.
671 : */
672 :
673 : /**
674 : * \var double GDALWarpKernel::dfSrcXExtraSize;
675 : *
676 : * Number of pixels included in nSrcXSize that are present on the edges of
677 : * the area of interest to take into account the width of the kernel.
678 : *
679 : * This field is required.
680 : */
681 :
682 : /**
683 : * \var double GDALWarpKernel::dfSrcYExtraSize;
684 : *
685 : * Number of pixels included in nSrcYExtraSize that are present on the edges of
686 : * the area of interest to take into account the height of the kernel.
687 : *
688 : * This field is required.
689 : */
690 :
691 : /**
692 : * \var int GDALWarpKernel::papabySrcImage;
693 : *
694 : * Array of source image band data.
695 : *
696 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
697 : * to image data. Each individual band of image data is organized as a single
698 : * block of image data in left to right, then bottom to top order. The actual
699 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
700 : *
701 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
702 : * the second band with eWorkingDataType set to GDT_Float32 use code like
703 : * this:
704 : *
705 : * \code
706 : * float dfPixelValue;
707 : * int nBand = 2-1; // Band indexes are zero based.
708 : * int nPixel = 3; // Zero based.
709 : * int nLine = 4; // Zero based.
710 : *
711 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
712 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
713 : * assert( nBand >= 0 && nBand < poKern->nBands );
714 : * dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
715 : * [nPixel + nLine * poKern->nSrcXSize];
716 : * \endcode
717 : *
718 : * This field is required.
719 : */
720 :
721 : /**
722 : * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
723 : *
724 : * Per band validity mask for source pixels.
725 : *
726 : * Array of pixel validity mask layers for each source band. Each of
727 : * the mask layers is the same size (in pixels) as the source image with
728 : * one bit per pixel. Note that it is legal (and common) for this to be
729 : * NULL indicating that none of the pixels are invalidated, or for some
730 : * band validity masks to be NULL in which case all pixels of the band are
731 : * valid. The following code can be used to test the validity of a particular
732 : * pixel.
733 : *
734 : * \code
735 : * int bIsValid = TRUE;
736 : * int nBand = 2-1; // Band indexes are zero based.
737 : * int nPixel = 3; // Zero based.
738 : * int nLine = 4; // Zero based.
739 : *
740 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
741 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
742 : * assert( nBand >= 0 && nBand < poKern->nBands );
743 : *
744 : * if( poKern->papanBandSrcValid != NULL
745 : * && poKern->papanBandSrcValid[nBand] != NULL )
746 : * {
747 : * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
748 : * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
749 : *
750 : * bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
751 : * }
752 : * \endcode
753 : */
754 :
755 : /**
756 : * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
757 : *
758 : * Per pixel validity mask for source pixels.
759 : *
760 : * A single validity mask layer that applies to the pixels of all source
761 : * bands. It is accessed similarly to papanBandSrcValid, but without the
762 : * extra level of band indirection.
763 : *
764 : * This pointer may be NULL indicating that all pixels are valid.
765 : *
766 : * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
767 : * the pixel isn't considered to be valid unless both arrays indicate it is
768 : * valid.
769 : */
770 :
771 : /**
772 : * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
773 : *
774 : * Per pixel density mask for source pixels.
775 : *
776 : * A single density mask layer that applies to the pixels of all source
777 : * bands. It contains values between 0.0 and 1.0 indicating the degree to
778 : * which this pixel should be allowed to contribute to the output result.
779 : *
780 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
781 : *
782 : * The density for a pixel may be accessed like this:
783 : *
784 : * \code
785 : * float fDensity = 1.0;
786 : * int nPixel = 3; // Zero based.
787 : * int nLine = 4; // Zero based.
788 : *
789 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
790 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
791 : * if( poKern->pafUnifiedSrcDensity != NULL )
792 : * fDensity = poKern->pafUnifiedSrcDensity
793 : * [nPixel + nLine * poKern->nSrcXSize];
794 : * \endcode
795 : */
796 :
797 : /**
798 : * \var int GDALWarpKernel::nDstXSize;
799 : *
800 : * Width of destination image in pixels.
801 : *
802 : * This field is required.
803 : */
804 :
805 : /**
806 : * \var int GDALWarpKernel::nDstYSize;
807 : *
808 : * Height of destination image in pixels.
809 : *
810 : * This field is required.
811 : */
812 :
813 : /**
814 : * \var GByte **GDALWarpKernel::papabyDstImage;
815 : *
816 : * Array of destination image band data.
817 : *
818 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
819 : * to image data. Each individual band of image data is organized as a single
820 : * block of image data in left to right, then bottom to top order. The actual
821 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
822 : *
823 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
824 : * the second band with eWorkingDataType set to GDT_Float32 use code like
825 : * this:
826 : *
827 : * \code
828 : * float dfPixelValue;
829 : * int nBand = 2-1; // Band indexes are zero based.
830 : * int nPixel = 3; // Zero based.
831 : * int nLine = 4; // Zero based.
832 : *
833 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
834 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
835 : * assert( nBand >= 0 && nBand < poKern->nBands );
836 : * dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
837 : * [nPixel + nLine * poKern->nSrcYSize];
838 : * \endcode
839 : *
840 : * This field is required.
841 : */
842 :
843 : /**
844 : * \var GUInt32 *GDALWarpKernel::panDstValid;
845 : *
846 : * Per pixel validity mask for destination pixels.
847 : *
848 : * A single validity mask layer that applies to the pixels of all destination
849 : * bands. It is accessed similarly to papanUnitifiedSrcValid, but based
850 : * on the size of the destination image.
851 : *
852 : * This pointer may be NULL indicating that all pixels are valid.
853 : */
854 :
855 : /**
856 : * \var float *GDALWarpKernel::pafDstDensity;
857 : *
858 : * Per pixel density mask for destination pixels.
859 : *
860 : * A single density mask layer that applies to the pixels of all destination
861 : * bands. It contains values between 0.0 and 1.0.
862 : *
863 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
864 : *
865 : * The density for a pixel may be accessed like this:
866 : *
867 : * \code
868 : * float fDensity = 1.0;
869 : * int nPixel = 3; // Zero based.
870 : * int nLine = 4; // Zero based.
871 : *
872 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
873 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
874 : * if( poKern->pafDstDensity != NULL )
875 : * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
876 : * \endcode
877 : */
878 :
879 : /**
880 : * \var int GDALWarpKernel::nSrcXOff;
881 : *
882 : * X offset to source pixel coordinates for transformation.
883 : *
884 : * See pfnTransformer.
885 : *
886 : * This field is required.
887 : */
888 :
889 : /**
890 : * \var int GDALWarpKernel::nSrcYOff;
891 : *
892 : * Y offset to source pixel coordinates for transformation.
893 : *
894 : * See pfnTransformer.
895 : *
896 : * This field is required.
897 : */
898 :
899 : /**
900 : * \var int GDALWarpKernel::nDstXOff;
901 : *
902 : * X offset to destination pixel coordinates for transformation.
903 : *
904 : * See pfnTransformer.
905 : *
906 : * This field is required.
907 : */
908 :
909 : /**
910 : * \var int GDALWarpKernel::nDstYOff;
911 : *
912 : * Y offset to destination pixel coordinates for transformation.
913 : *
914 : * See pfnTransformer.
915 : *
916 : * This field is required.
917 : */
918 :
919 : /**
920 : * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
921 : *
922 : * Source/destination location transformer.
923 : *
924 : * The function to call to transform coordinates between source image
925 : * pixel/line coordinates and destination image pixel/line coordinates.
926 : * See GDALTransformerFunc() for details of the semantics of this function.
927 : *
928 : * The GDALWarpKern algorithm will only ever use this transformer in
929 : * "destination to source" mode (bDstToSrc=TRUE), and will always pass
930 : * partial or complete scanlines of points in the destination image as
931 : * input. This means, among other things, that it is safe to the
932 : * approximating transform GDALApproxTransform() as the transformation
933 : * function.
934 : *
935 : * Source and destination images may be subsets of a larger overall image.
936 : * The transformation algorithms will expect and return pixel/line coordinates
937 : * in terms of this larger image, so coordinates need to be offset by
938 : * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
939 : * passing to pfnTransformer, and after return from it.
940 : *
941 : * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
942 : * data to this function when it is called.
943 : *
944 : * This field is required.
945 : */
946 :
947 : /**
948 : * \var void *GDALWarpKernel::pTransformerArg;
949 : *
950 : * Callback data for pfnTransformer.
951 : *
952 : * This field may be NULL if not required for the pfnTransformer being used.
953 : */
954 :
955 : /**
956 : * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
957 : *
958 : * The function to call to report progress of the algorithm, and to check
959 : * for a requested termination of the operation. It operates according to
960 : * GDALProgressFunc() semantics.
961 : *
962 : * Generally speaking the progress function will be invoked for each
963 : * scanline of the destination buffer that has been processed.
964 : *
965 : * This field may be NULL (internally set to GDALDummyProgress()).
966 : */
967 :
968 : /**
969 : * \var void *GDALWarpKernel::pProgress;
970 : *
971 : * Callback data for pfnProgress.
972 : *
973 : * This field may be NULL if not required for the pfnProgress being used.
974 : */
975 :
976 : /************************************************************************/
977 : /* GDALWarpKernel() */
978 : /************************************************************************/
979 :
980 2338 : GDALWarpKernel::GDALWarpKernel()
981 : : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
982 : eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
983 : dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
984 : papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
985 : pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
986 : papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
987 : dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
988 : nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
989 : nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
990 : pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
991 : pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
992 : padfDstNoDataReal(nullptr), psThreadData(nullptr),
993 2338 : eTieStrategy(GWKTS_First)
994 : {
995 2338 : }
996 :
997 : /************************************************************************/
998 : /* ~GDALWarpKernel() */
999 : /************************************************************************/
1000 :
1001 2338 : GDALWarpKernel::~GDALWarpKernel()
1002 : {
1003 2338 : }
1004 :
1005 : /************************************************************************/
1006 : /* PerformWarp() */
1007 : /************************************************************************/
1008 :
1009 : /**
1010 : * \fn CPLErr GDALWarpKernel::PerformWarp();
1011 : *
1012 : * This method performs the warp described in the GDALWarpKernel.
1013 : *
1014 : * @return CE_None on success or CE_Failure if an error occurs.
1015 : */
1016 :
1017 2336 : CPLErr GDALWarpKernel::PerformWarp()
1018 :
1019 : {
1020 2336 : const CPLErr eErr = Validate();
1021 :
1022 2336 : if (eErr != CE_None)
1023 1 : return eErr;
1024 :
1025 : // See #2445 and #3079.
1026 2335 : if (nSrcXSize <= 0 || nSrcYSize <= 0)
1027 : {
1028 303 : if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
1029 : {
1030 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
1031 0 : return CE_Failure;
1032 : }
1033 303 : return CE_None;
1034 : }
1035 :
1036 : /* -------------------------------------------------------------------- */
1037 : /* Pre-calculate resampling scales and window sizes for filtering. */
1038 : /* -------------------------------------------------------------------- */
1039 :
1040 2032 : dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
1041 2032 : dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
1042 2032 : if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
1043 1263 : dfXScale = 1.0;
1044 2032 : if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
1045 1016 : dfYScale = 1.0;
1046 2032 : if (dfXScale < 1.0)
1047 : {
1048 548 : double dfXReciprocalScale = 1.0 / dfXScale;
1049 548 : const int nXReciprocalScale =
1050 548 : static_cast<int>(dfXReciprocalScale + 0.5);
1051 548 : if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
1052 431 : dfXScale = 1.0 / nXReciprocalScale;
1053 : }
1054 2032 : if (dfYScale < 1.0)
1055 : {
1056 517 : double dfYReciprocalScale = 1.0 / dfYScale;
1057 517 : const int nYReciprocalScale =
1058 517 : static_cast<int>(dfYReciprocalScale + 0.5);
1059 517 : if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
1060 368 : dfYScale = 1.0 / nYReciprocalScale;
1061 : }
1062 :
1063 : // XSCALE and YSCALE undocumented for now. Can help in some cases.
1064 : // Best would probably be a per-pixel scale computation.
1065 2032 : const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
1066 2032 : if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
1067 1 : dfXScale = CPLAtof(pszXScale);
1068 2032 : const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
1069 2032 : if (pszYScale != nullptr)
1070 1 : dfYScale = CPLAtof(pszYScale);
1071 :
1072 : // If the xscale is significantly lower than the yscale, this is highly
1073 : // suspicious of a situation of wrapping a very large virtual file in
1074 : // geographic coordinates with left and right parts being close to the
1075 : // antimeridian. In that situation, the xscale computed by the above method
1076 : // is completely wrong. Prefer doing an average of a few sample points
1077 : // instead
1078 2032 : if ((dfYScale / dfXScale > 100 ||
1079 1 : (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
1080 : {
1081 : // Sample points along a grid
1082 4 : const int nPointsX = std::min(10, nDstXSize);
1083 4 : const int nPointsY = std::min(10, nDstYSize);
1084 4 : const int nPoints = 3 * nPointsX * nPointsY;
1085 8 : std::vector<double> padfX;
1086 8 : std::vector<double> padfY;
1087 8 : std::vector<double> padfZ(nPoints);
1088 8 : std::vector<int> pabSuccess(nPoints);
1089 44 : for (int iY = 0; iY < nPointsY; iY++)
1090 : {
1091 440 : for (int iX = 0; iX < nPointsX; iX++)
1092 : {
1093 400 : const double dfX =
1094 : nPointsX == 1
1095 400 : ? 0.0
1096 400 : : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
1097 400 : const double dfY =
1098 : nPointsY == 1
1099 400 : ? 0.0
1100 400 : : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
1101 :
1102 : // Reproject each destination sample point and its neighbours
1103 : // at (x+1,y) and (x,y+1), so as to get the local scale.
1104 400 : padfX.push_back(dfX);
1105 400 : padfY.push_back(dfY);
1106 :
1107 400 : padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
1108 400 : padfY.push_back(dfY);
1109 :
1110 400 : padfX.push_back(dfX);
1111 400 : padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
1112 : }
1113 : }
1114 4 : pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
1115 4 : &padfZ[0], &pabSuccess[0]);
1116 :
1117 : // Compute the xscale at each sampling point
1118 8 : std::vector<double> adfXScales;
1119 404 : for (int i = 0; i < nPoints; i += 3)
1120 : {
1121 400 : if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
1122 : {
1123 : const double dfPointXScale =
1124 400 : 1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
1125 800 : std::abs(padfX[i + 2] - padfX[i]));
1126 400 : adfXScales.push_back(dfPointXScale);
1127 : }
1128 : }
1129 :
1130 : // Sort by increasing xcale
1131 4 : std::sort(adfXScales.begin(), adfXScales.end());
1132 :
1133 4 : if (!adfXScales.empty())
1134 : {
1135 : // Compute the average of scales, but eliminate outliers small
1136 : // scales, if some samples are just along the discontinuity.
1137 4 : const double dfMaxPointXScale = adfXScales.back();
1138 4 : double dfSumPointXScale = 0;
1139 4 : int nCountPointScale = 0;
1140 404 : for (double dfPointXScale : adfXScales)
1141 : {
1142 400 : if (dfPointXScale > dfMaxPointXScale / 10)
1143 : {
1144 398 : dfSumPointXScale += dfPointXScale;
1145 398 : nCountPointScale++;
1146 : }
1147 : }
1148 4 : if (nCountPointScale > 0) // should always be true
1149 : {
1150 4 : const double dfXScaleFromSampling =
1151 4 : dfSumPointXScale / nCountPointScale;
1152 : #if DEBUG_VERBOSE
1153 : CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
1154 : dfXScaleFromSampling);
1155 : #endif
1156 4 : dfXScale = dfXScaleFromSampling;
1157 : }
1158 : }
1159 : }
1160 :
1161 : #if DEBUG_VERBOSE
1162 : CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
1163 : #endif
1164 :
1165 2032 : const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
1166 :
1167 : // Safety check for callers that would use GDALWarpKernel without using
1168 : // GDALWarpOperation.
1169 1969 : if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
1170 1906 : ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
1171 4064 : !bUse4SamplesFormula)) &&
1172 388 : atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
1173 : WARP_EXTRA_ELTS)
1174 : {
1175 0 : CPLError(CE_Failure, CPLE_AppDefined,
1176 : "Source arrays must have WARP_EXTRA_ELTS extra elements at "
1177 : "their end. "
1178 : "See GDALWarpKernel class definition. If this condition is "
1179 : "fulfilled, define a EXTRA_ELTS=%d warp options",
1180 : WARP_EXTRA_ELTS);
1181 0 : return CE_Failure;
1182 : }
1183 :
1184 2032 : dfXFilter = anGWKFilterRadius[eResample];
1185 2032 : dfYFilter = anGWKFilterRadius[eResample];
1186 :
1187 2032 : nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
1188 1566 : : static_cast<int>(dfXFilter);
1189 2032 : nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
1190 1543 : : static_cast<int>(dfYFilter);
1191 :
1192 : // Filter window offset depends on the parity of the kernel radius.
1193 2032 : nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
1194 2032 : nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
1195 :
1196 2032 : bApplyVerticalShift =
1197 2032 : CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
1198 2032 : dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
1199 2032 : papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
1200 :
1201 : /* -------------------------------------------------------------------- */
1202 : /* Set up resampling functions. */
1203 : /* -------------------------------------------------------------------- */
1204 2032 : if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
1205 12 : return GWKGeneralCase(this);
1206 :
1207 : #if defined(HAVE_OPENCL)
1208 571 : if ((eWorkingDataType == GDT_Byte || eWorkingDataType == GDT_CInt16 ||
1209 399 : eWorkingDataType == GDT_UInt16 || eWorkingDataType == GDT_Int16 ||
1210 265 : eWorkingDataType == GDT_CFloat32 || eWorkingDataType == GDT_Float32) &&
1211 1875 : (eResample == GRA_Bilinear || eResample == GRA_Cubic ||
1212 1435 : eResample == GRA_CubicSpline || eResample == GRA_Lanczos) &&
1213 4563 : !bApplyVerticalShift &&
1214 : // OpenCL warping gives different results than the ones expected by autotest,
1215 : // so disable it by default even if found.
1216 1046 : CPLTestBool(
1217 523 : CSLFetchNameValueDef(papszWarpOptions, "USE_OPENCL",
1218 : CPLGetConfigOption("GDAL_USE_OPENCL", "NO"))))
1219 : {
1220 0 : if (pafUnifiedSrcDensity != nullptr)
1221 : {
1222 : // If pafUnifiedSrcDensity is only set to 1.0, then we can
1223 : // discard it.
1224 0 : bool bFoundNotOne = false;
1225 0 : for (GPtrDiff_t j = 0;
1226 0 : j < static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize; j++)
1227 : {
1228 0 : if (pafUnifiedSrcDensity[j] != 1.0)
1229 : {
1230 0 : bFoundNotOne = true;
1231 0 : break;
1232 : }
1233 : }
1234 0 : if (!bFoundNotOne)
1235 : {
1236 0 : CPLFree(pafUnifiedSrcDensity);
1237 0 : pafUnifiedSrcDensity = nullptr;
1238 : }
1239 : }
1240 :
1241 0 : if (pafUnifiedSrcDensity != nullptr)
1242 : {
1243 : // Typically if there's a cutline or an alpha band
1244 0 : CPLDebugOnce("WARP", "pafUnifiedSrcDensity is not null, "
1245 : "hence OpenCL warper cannot be used");
1246 : }
1247 : else
1248 : {
1249 0 : const CPLErr eResult = GWKOpenCLCase(this);
1250 :
1251 : // CE_Warning tells us a suitable OpenCL environment was not available
1252 : // so we fall through to other CPU based methods.
1253 0 : if (eResult != CE_Warning)
1254 0 : return eResult;
1255 : }
1256 : }
1257 : #endif // defined HAVE_OPENCL
1258 :
1259 2020 : const bool bNoMasksOrDstDensityOnly =
1260 2016 : papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
1261 4036 : pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
1262 :
1263 2020 : if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour &&
1264 : bNoMasksOrDstDensityOnly)
1265 863 : return GWKNearestNoMasksOrDstDensityOnlyByte(this);
1266 :
1267 1157 : if (eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear &&
1268 : bNoMasksOrDstDensityOnly)
1269 126 : return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
1270 :
1271 1031 : if (eWorkingDataType == GDT_Byte && eResample == GRA_Cubic &&
1272 : bNoMasksOrDstDensityOnly)
1273 72 : return GWKCubicNoMasksOrDstDensityOnlyByte(this);
1274 :
1275 959 : if (eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline &&
1276 : bNoMasksOrDstDensityOnly)
1277 12 : return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
1278 :
1279 947 : if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour)
1280 276 : return GWKNearestByte(this);
1281 :
1282 671 : if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1283 134 : eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
1284 18 : return GWKNearestNoMasksOrDstDensityOnlyShort(this);
1285 :
1286 653 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
1287 : bNoMasksOrDstDensityOnly)
1288 5 : return GWKCubicNoMasksOrDstDensityOnlyShort(this);
1289 :
1290 648 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
1291 : bNoMasksOrDstDensityOnly)
1292 6 : return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
1293 :
1294 642 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
1295 : bNoMasksOrDstDensityOnly)
1296 18 : return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
1297 :
1298 624 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
1299 : bNoMasksOrDstDensityOnly)
1300 12 : return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
1301 :
1302 612 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
1303 : bNoMasksOrDstDensityOnly)
1304 5 : return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
1305 :
1306 607 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
1307 : bNoMasksOrDstDensityOnly)
1308 6 : return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
1309 :
1310 601 : if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1311 64 : eResample == GRA_NearestNeighbour)
1312 27 : return GWKNearestShort(this);
1313 :
1314 574 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
1315 : bNoMasksOrDstDensityOnly)
1316 11 : return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
1317 :
1318 563 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
1319 36 : return GWKNearestFloat(this);
1320 :
1321 527 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
1322 : bNoMasksOrDstDensityOnly)
1323 5 : return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
1324 :
1325 522 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
1326 : bNoMasksOrDstDensityOnly)
1327 9 : return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
1328 :
1329 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
1330 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
1331 : bNoMasksOrDstDensityOnly)
1332 : return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
1333 :
1334 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
1335 : bNoMasksOrDstDensityOnly)
1336 : return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
1337 : #endif
1338 :
1339 513 : if (eResample == GRA_Average)
1340 71 : return GWKAverageOrMode(this);
1341 :
1342 442 : if (eResample == GRA_RMS)
1343 9 : return GWKAverageOrMode(this);
1344 :
1345 433 : if (eResample == GRA_Mode)
1346 23 : return GWKAverageOrMode(this);
1347 :
1348 410 : if (eResample == GRA_Max)
1349 6 : return GWKAverageOrMode(this);
1350 :
1351 404 : if (eResample == GRA_Min)
1352 5 : return GWKAverageOrMode(this);
1353 :
1354 399 : if (eResample == GRA_Med)
1355 6 : return GWKAverageOrMode(this);
1356 :
1357 393 : if (eResample == GRA_Q1)
1358 5 : return GWKAverageOrMode(this);
1359 :
1360 388 : if (eResample == GRA_Q3)
1361 5 : return GWKAverageOrMode(this);
1362 :
1363 383 : if (eResample == GRA_Sum)
1364 18 : return GWKSumPreserving(this);
1365 :
1366 365 : if (!GDALDataTypeIsComplex(eWorkingDataType))
1367 : {
1368 134 : return GWKRealCase(this);
1369 : }
1370 :
1371 231 : return GWKGeneralCase(this);
1372 : }
1373 :
1374 : /************************************************************************/
1375 : /* Validate() */
1376 : /************************************************************************/
1377 :
1378 : /**
1379 : * \fn CPLErr GDALWarpKernel::Validate()
1380 : *
1381 : * Check the settings in the GDALWarpKernel, and issue a CPLError()
1382 : * (and return CE_Failure) if the configuration is considered to be
1383 : * invalid for some reason.
1384 : *
1385 : * This method will also do some standard defaulting such as setting
1386 : * pfnProgress to GDALDummyProgress() if it is NULL.
1387 : *
1388 : * @return CE_None on success or CE_Failure if an error is detected.
1389 : */
1390 :
1391 2336 : CPLErr GDALWarpKernel::Validate()
1392 :
1393 : {
1394 2336 : if (static_cast<size_t>(eResample) >=
1395 : (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
1396 : {
1397 0 : CPLError(CE_Failure, CPLE_AppDefined,
1398 : "Unsupported resampling method %d.",
1399 0 : static_cast<int>(eResample));
1400 0 : return CE_Failure;
1401 : }
1402 :
1403 : // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
1404 : // be ignored as contributing source pixels during resampling. Only taken into account by
1405 : // Average currently
1406 : const char *pszExcludedValues =
1407 2336 : CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
1408 2336 : if (pszExcludedValues)
1409 : {
1410 : const CPLStringList aosTokens(
1411 8 : CSLTokenizeString2(pszExcludedValues, "(,)", 0));
1412 8 : if ((aosTokens.size() % nBands) != 0)
1413 : {
1414 1 : CPLError(CE_Failure, CPLE_AppDefined,
1415 : "EXCLUDED_VALUES should contain one or several tuples of "
1416 : "%d values formatted like <R>,<G>,<B> or "
1417 : "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
1418 : "tuples",
1419 : nBands);
1420 1 : return CE_Failure;
1421 : }
1422 14 : std::vector<double> adfTuple;
1423 28 : for (int i = 0; i < aosTokens.size(); ++i)
1424 : {
1425 21 : adfTuple.push_back(CPLAtof(aosTokens[i]));
1426 21 : if (((i + 1) % nBands) == 0)
1427 : {
1428 7 : m_aadfExcludedValues.push_back(adfTuple);
1429 7 : adfTuple.clear();
1430 : }
1431 : }
1432 : }
1433 :
1434 2335 : return CE_None;
1435 : }
1436 :
1437 : /************************************************************************/
1438 : /* GWKOverlayDensity() */
1439 : /* */
1440 : /* Compute the final density for the destination pixel. This */
1441 : /* is a function of the overlay density (passed in) and the */
1442 : /* original density. */
1443 : /************************************************************************/
1444 :
1445 7941290 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
1446 : double dfDensity)
1447 : {
1448 7941290 : if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
1449 6750420 : return;
1450 :
1451 1190880 : poWK->pafDstDensity[iDstOffset] = static_cast<float>(
1452 1190880 : 1.0 - (1.0 - dfDensity) * (1.0 - poWK->pafDstDensity[iDstOffset]));
1453 : }
1454 :
1455 : /************************************************************************/
1456 : /* GWKRoundValueT() */
1457 : /************************************************************************/
1458 :
1459 : template <class T, bool is_signed> struct sGWKRoundValueT
1460 : {
1461 : static T eval(double);
1462 : };
1463 :
1464 : template <class T> struct sGWKRoundValueT<T, true> /* signed */
1465 : {
1466 2312700 : static T eval(double dfValue)
1467 : {
1468 2312700 : return static_cast<T>(floor(dfValue + 0.5));
1469 : }
1470 : };
1471 :
1472 : template <class T> struct sGWKRoundValueT<T, false> /* unsigned */
1473 : {
1474 12949981 : static T eval(double dfValue)
1475 : {
1476 12949981 : return static_cast<T>(dfValue + 0.5);
1477 : }
1478 : };
1479 :
1480 15237881 : template <class T> static T GWKRoundValueT(double dfValue)
1481 : {
1482 15237881 : return sGWKRoundValueT<T, std::numeric_limits<T>::is_signed>::eval(dfValue);
1483 : }
1484 :
1485 269074 : template <> float GWKRoundValueT<float>(double dfValue)
1486 : {
1487 269074 : return static_cast<float>(dfValue);
1488 : }
1489 :
1490 : #ifdef notused
1491 : template <> double GWKRoundValueT<double>(double dfValue)
1492 : {
1493 : return dfValue;
1494 : }
1495 : #endif
1496 :
1497 : /************************************************************************/
1498 : /* GWKClampValueT() */
1499 : /************************************************************************/
1500 :
1501 10418216 : template <class T> static CPL_INLINE T GWKClampValueT(double dfValue)
1502 : {
1503 10418216 : if (dfValue < std::numeric_limits<T>::min())
1504 3969 : return std::numeric_limits<T>::min();
1505 10424058 : else if (dfValue > std::numeric_limits<T>::max())
1506 18463 : return std::numeric_limits<T>::max();
1507 : else
1508 10429158 : return GWKRoundValueT<T>(dfValue);
1509 : }
1510 :
1511 718914 : template <> float GWKClampValueT<float>(double dfValue)
1512 : {
1513 718914 : return static_cast<float>(dfValue);
1514 : }
1515 :
1516 : #ifdef notused
1517 : template <> double GWKClampValueT<double>(double dfValue)
1518 : {
1519 : return dfValue;
1520 : }
1521 : #endif
1522 :
1523 : /************************************************************************/
1524 : /* GWKSetPixelValueRealT() */
1525 : /************************************************************************/
1526 :
1527 : template <class T>
1528 159076 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
1529 : GPtrDiff_t iDstOffset, double dfDensity,
1530 : T value)
1531 : {
1532 159076 : T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
1533 :
1534 : /* -------------------------------------------------------------------- */
1535 : /* If the source density is less than 100% we need to fetch the */
1536 : /* existing destination value, and mix it with the source to */
1537 : /* get the new "to apply" value. Also compute composite */
1538 : /* density. */
1539 : /* */
1540 : /* We avoid mixing if density is very near one or risk mixing */
1541 : /* in very extreme nodata values and causing odd results (#1610) */
1542 : /* -------------------------------------------------------------------- */
1543 159076 : if (dfDensity < 0.9999)
1544 : {
1545 159076 : if (dfDensity < 0.0001)
1546 0 : return true;
1547 :
1548 159076 : double dfDstDensity = 1.0;
1549 :
1550 159076 : if (poWK->pafDstDensity != nullptr)
1551 157604 : dfDstDensity = poWK->pafDstDensity[iDstOffset];
1552 1472 : else if (poWK->panDstValid != nullptr &&
1553 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1554 0 : dfDstDensity = 0.0;
1555 :
1556 : // It seems like we also ought to be testing panDstValid[] here!
1557 :
1558 159076 : const double dfDstReal = pDst[iDstOffset];
1559 :
1560 : // The destination density is really only relative to the portion
1561 : // not occluded by the overlay.
1562 159076 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1563 :
1564 159076 : const double dfReal = (value * dfDensity + dfDstReal * dfDstInfluence) /
1565 159076 : (dfDensity + dfDstInfluence);
1566 :
1567 : /* --------------------------------------------------------------------
1568 : */
1569 : /* Actually apply the destination value. */
1570 : /* */
1571 : /* Avoid using the destination nodata value for integer datatypes
1572 : */
1573 : /* if by chance it is equal to the computed pixel value. */
1574 : /* --------------------------------------------------------------------
1575 : */
1576 159076 : pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
1577 : }
1578 : else
1579 : {
1580 0 : pDst[iDstOffset] = value;
1581 : }
1582 :
1583 159076 : if (poWK->padfDstNoDataReal != nullptr &&
1584 0 : poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
1585 : {
1586 0 : if (pDst[iDstOffset] == std::numeric_limits<T>::min())
1587 0 : pDst[iDstOffset] = std::numeric_limits<T>::min() + 1;
1588 : else
1589 0 : pDst[iDstOffset]--;
1590 : }
1591 :
1592 159076 : return true;
1593 : }
1594 :
1595 : /************************************************************************/
1596 : /* GWKSetPixelValue() */
1597 : /************************************************************************/
1598 :
1599 3867640 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
1600 : GPtrDiff_t iDstOffset, double dfDensity,
1601 : double dfReal, double dfImag)
1602 :
1603 : {
1604 3867640 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1605 :
1606 : /* -------------------------------------------------------------------- */
1607 : /* If the source density is less than 100% we need to fetch the */
1608 : /* existing destination value, and mix it with the source to */
1609 : /* get the new "to apply" value. Also compute composite */
1610 : /* density. */
1611 : /* */
1612 : /* We avoid mixing if density is very near one or risk mixing */
1613 : /* in very extreme nodata values and causing odd results (#1610) */
1614 : /* -------------------------------------------------------------------- */
1615 3867640 : if (dfDensity < 0.9999)
1616 : {
1617 800 : if (dfDensity < 0.0001)
1618 0 : return true;
1619 :
1620 800 : double dfDstDensity = 1.0;
1621 800 : if (poWK->pafDstDensity != nullptr)
1622 800 : dfDstDensity = poWK->pafDstDensity[iDstOffset];
1623 0 : else if (poWK->panDstValid != nullptr &&
1624 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1625 0 : dfDstDensity = 0.0;
1626 :
1627 800 : double dfDstReal = 0.0;
1628 800 : double dfDstImag = 0.0;
1629 : // It seems like we also ought to be testing panDstValid[] here!
1630 :
1631 : // TODO(schwehr): Factor out this repreated type of set.
1632 800 : switch (poWK->eWorkingDataType)
1633 : {
1634 0 : case GDT_Byte:
1635 0 : dfDstReal = pabyDst[iDstOffset];
1636 0 : dfDstImag = 0.0;
1637 0 : break;
1638 :
1639 0 : case GDT_Int8:
1640 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1641 0 : dfDstImag = 0.0;
1642 0 : break;
1643 :
1644 400 : case GDT_Int16:
1645 400 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1646 400 : dfDstImag = 0.0;
1647 400 : break;
1648 :
1649 400 : case GDT_UInt16:
1650 400 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1651 400 : dfDstImag = 0.0;
1652 400 : break;
1653 :
1654 0 : case GDT_Int32:
1655 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1656 0 : dfDstImag = 0.0;
1657 0 : break;
1658 :
1659 0 : case GDT_UInt32:
1660 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1661 0 : dfDstImag = 0.0;
1662 0 : break;
1663 :
1664 0 : case GDT_Int64:
1665 0 : dfDstReal = static_cast<double>(
1666 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1667 0 : dfDstImag = 0.0;
1668 0 : break;
1669 :
1670 0 : case GDT_UInt64:
1671 0 : dfDstReal = static_cast<double>(
1672 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1673 0 : dfDstImag = 0.0;
1674 0 : break;
1675 :
1676 0 : case GDT_Float32:
1677 0 : dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
1678 0 : dfDstImag = 0.0;
1679 0 : break;
1680 :
1681 0 : case GDT_Float64:
1682 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1683 0 : dfDstImag = 0.0;
1684 0 : break;
1685 :
1686 0 : case GDT_CInt16:
1687 0 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
1688 0 : dfDstImag =
1689 0 : reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
1690 0 : break;
1691 :
1692 0 : case GDT_CInt32:
1693 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
1694 0 : dfDstImag =
1695 0 : reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
1696 0 : break;
1697 :
1698 0 : case GDT_CFloat32:
1699 0 : dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset * 2];
1700 0 : dfDstImag =
1701 0 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1];
1702 0 : break;
1703 :
1704 0 : case GDT_CFloat64:
1705 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
1706 0 : dfDstImag =
1707 0 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
1708 0 : break;
1709 :
1710 0 : case GDT_Unknown:
1711 : case GDT_TypeCount:
1712 0 : CPLAssert(false);
1713 : return false;
1714 : }
1715 :
1716 : // The destination density is really only relative to the portion
1717 : // not occluded by the overlay.
1718 800 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1719 :
1720 800 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
1721 800 : (dfDensity + dfDstInfluence);
1722 :
1723 800 : dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
1724 800 : (dfDensity + dfDstInfluence);
1725 : }
1726 :
1727 : /* -------------------------------------------------------------------- */
1728 : /* Actually apply the destination value. */
1729 : /* */
1730 : /* Avoid using the destination nodata value for integer datatypes */
1731 : /* if by chance it is equal to the computed pixel value. */
1732 : /* -------------------------------------------------------------------- */
1733 :
1734 : // TODO(schwehr): Can we make this a template?
1735 : #define CLAMP(type) \
1736 : do \
1737 : { \
1738 : type *_pDst = reinterpret_cast<type *>(pabyDst); \
1739 : if (dfReal < static_cast<double>(std::numeric_limits<type>::min())) \
1740 : _pDst[iDstOffset] = \
1741 : static_cast<type>(std::numeric_limits<type>::min()); \
1742 : else if (dfReal > \
1743 : static_cast<double>(std::numeric_limits<type>::max())) \
1744 : _pDst[iDstOffset] = \
1745 : static_cast<type>(std::numeric_limits<type>::max()); \
1746 : else \
1747 : _pDst[iDstOffset] = (std::numeric_limits<type>::is_signed) \
1748 : ? static_cast<type>(floor(dfReal + 0.5)) \
1749 : : static_cast<type>(dfReal + 0.5); \
1750 : if (poWK->padfDstNoDataReal != nullptr && \
1751 : poWK->padfDstNoDataReal[iBand] == \
1752 : static_cast<double>(_pDst[iDstOffset])) \
1753 : { \
1754 : if (_pDst[iDstOffset] == \
1755 : static_cast<type>(std::numeric_limits<type>::min())) \
1756 : _pDst[iDstOffset] = \
1757 : static_cast<type>(std::numeric_limits<type>::min() + 1); \
1758 : else \
1759 : _pDst[iDstOffset]--; \
1760 : } \
1761 : } while (false)
1762 :
1763 3867640 : switch (poWK->eWorkingDataType)
1764 : {
1765 3141450 : case GDT_Byte:
1766 3141450 : CLAMP(GByte);
1767 3141450 : break;
1768 :
1769 0 : case GDT_Int8:
1770 0 : CLAMP(GInt8);
1771 0 : break;
1772 :
1773 7470 : case GDT_Int16:
1774 7470 : CLAMP(GInt16);
1775 7470 : break;
1776 :
1777 463 : case GDT_UInt16:
1778 463 : CLAMP(GUInt16);
1779 463 : break;
1780 :
1781 63 : case GDT_UInt32:
1782 63 : CLAMP(GUInt32);
1783 63 : break;
1784 :
1785 3470 : case GDT_Int32:
1786 3470 : CLAMP(GInt32);
1787 3470 : break;
1788 :
1789 0 : case GDT_UInt64:
1790 0 : CLAMP(std::uint64_t);
1791 0 : break;
1792 :
1793 0 : case GDT_Int64:
1794 0 : CLAMP(std::int64_t);
1795 0 : break;
1796 :
1797 478957 : case GDT_Float32:
1798 478957 : reinterpret_cast<float *>(pabyDst)[iDstOffset] =
1799 478957 : static_cast<float>(dfReal);
1800 478957 : break;
1801 :
1802 147 : case GDT_Float64:
1803 147 : reinterpret_cast<double *>(pabyDst)[iDstOffset] = dfReal;
1804 147 : break;
1805 :
1806 234178 : case GDT_CInt16:
1807 : {
1808 : typedef GInt16 T;
1809 234178 : if (dfReal < static_cast<double>(std::numeric_limits<T>::min()))
1810 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1811 0 : std::numeric_limits<T>::min();
1812 234178 : else if (dfReal >
1813 234178 : static_cast<double>(std::numeric_limits<T>::max()))
1814 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1815 0 : std::numeric_limits<T>::max();
1816 : else
1817 234178 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1818 234178 : static_cast<T>(floor(dfReal + 0.5));
1819 234178 : if (dfImag < static_cast<double>(std::numeric_limits<T>::min()))
1820 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1821 0 : std::numeric_limits<T>::min();
1822 234178 : else if (dfImag >
1823 234178 : static_cast<double>(std::numeric_limits<T>::max()))
1824 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1825 0 : std::numeric_limits<T>::max();
1826 : else
1827 234178 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1828 234178 : static_cast<T>(floor(dfImag + 0.5));
1829 234178 : break;
1830 : }
1831 :
1832 478 : case GDT_CInt32:
1833 : {
1834 : typedef GInt32 T;
1835 478 : if (dfReal < static_cast<double>(std::numeric_limits<T>::min()))
1836 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1837 0 : std::numeric_limits<T>::min();
1838 478 : else if (dfReal >
1839 478 : static_cast<double>(std::numeric_limits<T>::max()))
1840 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1841 0 : std::numeric_limits<T>::max();
1842 : else
1843 478 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1844 478 : static_cast<T>(floor(dfReal + 0.5));
1845 478 : if (dfImag < static_cast<double>(std::numeric_limits<T>::min()))
1846 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1847 0 : std::numeric_limits<T>::min();
1848 478 : else if (dfImag >
1849 478 : static_cast<double>(std::numeric_limits<T>::max()))
1850 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1851 0 : std::numeric_limits<T>::max();
1852 : else
1853 478 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1854 478 : static_cast<T>(floor(dfImag + 0.5));
1855 478 : break;
1856 : }
1857 :
1858 490 : case GDT_CFloat32:
1859 490 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
1860 490 : static_cast<float>(dfReal);
1861 490 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
1862 490 : static_cast<float>(dfImag);
1863 490 : break;
1864 :
1865 478 : case GDT_CFloat64:
1866 478 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
1867 478 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
1868 478 : break;
1869 :
1870 0 : case GDT_Unknown:
1871 : case GDT_TypeCount:
1872 0 : return false;
1873 : }
1874 :
1875 3867640 : return true;
1876 : }
1877 :
1878 : /************************************************************************/
1879 : /* GWKSetPixelValueReal() */
1880 : /************************************************************************/
1881 :
1882 923761 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
1883 : GPtrDiff_t iDstOffset, double dfDensity,
1884 : double dfReal)
1885 :
1886 : {
1887 923761 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1888 :
1889 : /* -------------------------------------------------------------------- */
1890 : /* If the source density is less than 100% we need to fetch the */
1891 : /* existing destination value, and mix it with the source to */
1892 : /* get the new "to apply" value. Also compute composite */
1893 : /* density. */
1894 : /* */
1895 : /* We avoid mixing if density is very near one or risk mixing */
1896 : /* in very extreme nodata values and causing odd results (#1610) */
1897 : /* -------------------------------------------------------------------- */
1898 923761 : if (dfDensity < 0.9999)
1899 : {
1900 600 : if (dfDensity < 0.0001)
1901 0 : return true;
1902 :
1903 600 : double dfDstReal = 0.0;
1904 600 : double dfDstDensity = 1.0;
1905 :
1906 600 : if (poWK->pafDstDensity != nullptr)
1907 600 : dfDstDensity = poWK->pafDstDensity[iDstOffset];
1908 0 : else if (poWK->panDstValid != nullptr &&
1909 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1910 0 : dfDstDensity = 0.0;
1911 :
1912 : // It seems like we also ought to be testing panDstValid[] here!
1913 :
1914 600 : switch (poWK->eWorkingDataType)
1915 : {
1916 0 : case GDT_Byte:
1917 0 : dfDstReal = pabyDst[iDstOffset];
1918 0 : break;
1919 :
1920 0 : case GDT_Int8:
1921 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1922 0 : break;
1923 :
1924 300 : case GDT_Int16:
1925 300 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1926 300 : break;
1927 :
1928 300 : case GDT_UInt16:
1929 300 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1930 300 : break;
1931 :
1932 0 : case GDT_Int32:
1933 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1934 0 : break;
1935 :
1936 0 : case GDT_UInt32:
1937 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1938 0 : break;
1939 :
1940 0 : case GDT_Int64:
1941 0 : dfDstReal = static_cast<double>(
1942 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1943 0 : break;
1944 :
1945 0 : case GDT_UInt64:
1946 0 : dfDstReal = static_cast<double>(
1947 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1948 0 : break;
1949 :
1950 0 : case GDT_Float32:
1951 0 : dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
1952 0 : break;
1953 :
1954 0 : case GDT_Float64:
1955 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1956 0 : break;
1957 :
1958 0 : case GDT_CInt16:
1959 : case GDT_CInt32:
1960 : case GDT_CFloat32:
1961 : case GDT_CFloat64:
1962 : case GDT_Unknown:
1963 : case GDT_TypeCount:
1964 0 : CPLAssert(false);
1965 : return false;
1966 : }
1967 :
1968 : // The destination density is really only relative to the portion
1969 : // not occluded by the overlay.
1970 600 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1971 :
1972 600 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
1973 600 : (dfDensity + dfDstInfluence);
1974 : }
1975 :
1976 : /* -------------------------------------------------------------------- */
1977 : /* Actually apply the destination value. */
1978 : /* */
1979 : /* Avoid using the destination nodata value for integer datatypes */
1980 : /* if by chance it is equal to the computed pixel value. */
1981 : /* -------------------------------------------------------------------- */
1982 :
1983 923761 : switch (poWK->eWorkingDataType)
1984 : {
1985 916736 : case GDT_Byte:
1986 916736 : CLAMP(GByte);
1987 916736 : break;
1988 :
1989 0 : case GDT_Int8:
1990 0 : CLAMP(GInt8);
1991 0 : break;
1992 :
1993 1085 : case GDT_Int16:
1994 1085 : CLAMP(GInt16);
1995 1085 : break;
1996 :
1997 363 : case GDT_UInt16:
1998 363 : CLAMP(GUInt16);
1999 363 : break;
2000 :
2001 315 : case GDT_UInt32:
2002 315 : CLAMP(GUInt32);
2003 315 : break;
2004 :
2005 1318 : case GDT_Int32:
2006 1318 : CLAMP(GInt32);
2007 1318 : break;
2008 :
2009 0 : case GDT_UInt64:
2010 0 : CLAMP(std::uint64_t);
2011 0 : break;
2012 :
2013 100 : case GDT_Int64:
2014 100 : CLAMP(std::int64_t);
2015 100 : break;
2016 :
2017 3426 : case GDT_Float32:
2018 3426 : reinterpret_cast<float *>(pabyDst)[iDstOffset] =
2019 3426 : static_cast<float>(dfReal);
2020 3426 : break;
2021 :
2022 418 : case GDT_Float64:
2023 418 : reinterpret_cast<double *>(pabyDst)[iDstOffset] = dfReal;
2024 418 : break;
2025 :
2026 0 : case GDT_CInt16:
2027 : case GDT_CInt32:
2028 : case GDT_CFloat32:
2029 : case GDT_CFloat64:
2030 0 : return false;
2031 :
2032 0 : case GDT_Unknown:
2033 : case GDT_TypeCount:
2034 0 : CPLAssert(false);
2035 : return false;
2036 : }
2037 :
2038 923761 : return true;
2039 : }
2040 :
2041 : /************************************************************************/
2042 : /* GWKGetPixelValue() */
2043 : /************************************************************************/
2044 :
2045 : /* It is assumed that panUnifiedSrcValid has been checked before */
2046 :
2047 29336100 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
2048 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2049 : double *pdfReal, double *pdfImag)
2050 :
2051 : {
2052 29336100 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2053 :
2054 58672300 : if (poWK->papanBandSrcValid != nullptr &&
2055 29336100 : poWK->papanBandSrcValid[iBand] != nullptr &&
2056 0 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2057 : {
2058 0 : *pdfDensity = 0.0;
2059 0 : return false;
2060 : }
2061 :
2062 29336100 : *pdfReal = 0.0;
2063 29336100 : *pdfImag = 0.0;
2064 :
2065 : // TODO(schwehr): Fix casting.
2066 29336100 : switch (poWK->eWorkingDataType)
2067 : {
2068 28245600 : case GDT_Byte:
2069 28245600 : *pdfReal = pabySrc[iSrcOffset];
2070 28245600 : *pdfImag = 0.0;
2071 28245600 : break;
2072 :
2073 0 : case GDT_Int8:
2074 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2075 0 : *pdfImag = 0.0;
2076 0 : break;
2077 :
2078 28226 : case GDT_Int16:
2079 28226 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2080 28226 : *pdfImag = 0.0;
2081 28226 : break;
2082 :
2083 163 : case GDT_UInt16:
2084 163 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2085 163 : *pdfImag = 0.0;
2086 163 : break;
2087 :
2088 13726 : case GDT_Int32:
2089 13726 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2090 13726 : *pdfImag = 0.0;
2091 13726 : break;
2092 :
2093 63 : case GDT_UInt32:
2094 63 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2095 63 : *pdfImag = 0.0;
2096 63 : break;
2097 :
2098 0 : case GDT_Int64:
2099 0 : *pdfReal = static_cast<double>(
2100 0 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2101 0 : *pdfImag = 0.0;
2102 0 : break;
2103 :
2104 0 : case GDT_UInt64:
2105 0 : *pdfReal = static_cast<double>(
2106 0 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2107 0 : *pdfImag = 0.0;
2108 0 : break;
2109 :
2110 1047220 : case GDT_Float32:
2111 1047220 : *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
2112 1047220 : *pdfImag = 0.0;
2113 1047220 : break;
2114 :
2115 582 : case GDT_Float64:
2116 582 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2117 582 : *pdfImag = 0.0;
2118 582 : break;
2119 :
2120 130 : case GDT_CInt16:
2121 130 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
2122 130 : *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
2123 130 : break;
2124 :
2125 130 : case GDT_CInt32:
2126 130 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
2127 130 : *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
2128 130 : break;
2129 :
2130 178 : case GDT_CFloat32:
2131 178 : *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2];
2132 178 : *pdfImag = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1];
2133 178 : break;
2134 :
2135 130 : case GDT_CFloat64:
2136 130 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
2137 130 : *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
2138 130 : break;
2139 :
2140 0 : case GDT_Unknown:
2141 : case GDT_TypeCount:
2142 0 : CPLAssert(false);
2143 : *pdfDensity = 0.0;
2144 : return false;
2145 : }
2146 :
2147 29336100 : if (poWK->pafUnifiedSrcDensity != nullptr)
2148 3015160 : *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2149 : else
2150 26321000 : *pdfDensity = 1.0;
2151 :
2152 29336100 : return *pdfDensity != 0.0;
2153 : }
2154 :
2155 : /************************************************************************/
2156 : /* GWKGetPixelValueReal() */
2157 : /************************************************************************/
2158 :
2159 1012 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2160 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2161 : double *pdfReal)
2162 :
2163 : {
2164 1012 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2165 :
2166 2026 : if (poWK->papanBandSrcValid != nullptr &&
2167 1014 : poWK->papanBandSrcValid[iBand] != nullptr &&
2168 2 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2169 : {
2170 0 : *pdfDensity = 0.0;
2171 0 : return false;
2172 : }
2173 :
2174 1012 : switch (poWK->eWorkingDataType)
2175 : {
2176 1 : case GDT_Byte:
2177 1 : *pdfReal = pabySrc[iSrcOffset];
2178 1 : break;
2179 :
2180 0 : case GDT_Int8:
2181 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2182 0 : break;
2183 :
2184 1 : case GDT_Int16:
2185 1 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2186 1 : break;
2187 :
2188 1 : case GDT_UInt16:
2189 1 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2190 1 : break;
2191 :
2192 870 : case GDT_Int32:
2193 870 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2194 870 : break;
2195 :
2196 67 : case GDT_UInt32:
2197 67 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2198 67 : break;
2199 :
2200 0 : case GDT_Int64:
2201 0 : *pdfReal = static_cast<double>(
2202 0 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2203 0 : break;
2204 :
2205 0 : case GDT_UInt64:
2206 0 : *pdfReal = static_cast<double>(
2207 0 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2208 0 : break;
2209 :
2210 2 : case GDT_Float32:
2211 2 : *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
2212 2 : break;
2213 :
2214 70 : case GDT_Float64:
2215 70 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2216 70 : break;
2217 :
2218 0 : case GDT_CInt16:
2219 : case GDT_CInt32:
2220 : case GDT_CFloat32:
2221 : case GDT_CFloat64:
2222 : case GDT_Unknown:
2223 : case GDT_TypeCount:
2224 0 : CPLAssert(false);
2225 : return false;
2226 : }
2227 :
2228 1012 : if (poWK->pafUnifiedSrcDensity != nullptr)
2229 0 : *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2230 : else
2231 1012 : *pdfDensity = 1.0;
2232 :
2233 1012 : return *pdfDensity != 0.0;
2234 : }
2235 :
2236 : /************************************************************************/
2237 : /* GWKGetPixelRow() */
2238 : /************************************************************************/
2239 :
2240 : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
2241 : /* data-types. */
2242 :
2243 2353850 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
2244 : GPtrDiff_t iSrcOffset, int nHalfSrcLen,
2245 : double *padfDensity, double adfReal[],
2246 : double *padfImag)
2247 : {
2248 : // We know that nSrcLen is even, so we can *always* unroll loops 2x.
2249 2353850 : const int nSrcLen = nHalfSrcLen * 2;
2250 2353850 : bool bHasValid = false;
2251 :
2252 2353850 : if (padfDensity != nullptr)
2253 : {
2254 : // Init the density.
2255 3345770 : for (int i = 0; i < nSrcLen; i += 2)
2256 : {
2257 2189510 : padfDensity[i] = 1.0;
2258 2189510 : padfDensity[i + 1] = 1.0;
2259 : }
2260 :
2261 1156260 : if (poWK->panUnifiedSrcValid != nullptr)
2262 : {
2263 3281460 : for (int i = 0; i < nSrcLen; i += 2)
2264 : {
2265 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
2266 2067740 : bHasValid = true;
2267 : else
2268 74323 : padfDensity[i] = 0.0;
2269 :
2270 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
2271 2068400 : bHasValid = true;
2272 : else
2273 73668 : padfDensity[i + 1] = 0.0;
2274 : }
2275 :
2276 : // Reset or fail as needed.
2277 1139400 : if (bHasValid)
2278 1116590 : bHasValid = false;
2279 : else
2280 22806 : return false;
2281 : }
2282 :
2283 1133450 : if (poWK->papanBandSrcValid != nullptr &&
2284 0 : poWK->papanBandSrcValid[iBand] != nullptr)
2285 : {
2286 0 : for (int i = 0; i < nSrcLen; i += 2)
2287 : {
2288 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
2289 0 : bHasValid = true;
2290 : else
2291 0 : padfDensity[i] = 0.0;
2292 :
2293 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
2294 0 : iSrcOffset + i + 1))
2295 0 : bHasValid = true;
2296 : else
2297 0 : padfDensity[i + 1] = 0.0;
2298 : }
2299 :
2300 : // Reset or fail as needed.
2301 0 : if (bHasValid)
2302 0 : bHasValid = false;
2303 : else
2304 0 : return false;
2305 : }
2306 : }
2307 :
2308 : // TODO(schwehr): Fix casting.
2309 : // Fetch data.
2310 2331040 : switch (poWK->eWorkingDataType)
2311 : {
2312 1121060 : case GDT_Byte:
2313 : {
2314 1121060 : GByte *pSrc =
2315 1121060 : reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
2316 1121060 : pSrc += iSrcOffset;
2317 3243800 : for (int i = 0; i < nSrcLen; i += 2)
2318 : {
2319 2122740 : adfReal[i] = pSrc[i];
2320 2122740 : adfReal[i + 1] = pSrc[i + 1];
2321 : }
2322 1121060 : break;
2323 : }
2324 :
2325 0 : case GDT_Int8:
2326 : {
2327 0 : GInt8 *pSrc =
2328 0 : reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
2329 0 : pSrc += iSrcOffset;
2330 0 : for (int i = 0; i < nSrcLen; i += 2)
2331 : {
2332 0 : adfReal[i] = pSrc[i];
2333 0 : adfReal[i + 1] = pSrc[i + 1];
2334 : }
2335 0 : break;
2336 : }
2337 :
2338 5558 : case GDT_Int16:
2339 : {
2340 5558 : GInt16 *pSrc =
2341 5558 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2342 5558 : pSrc += iSrcOffset;
2343 21380 : for (int i = 0; i < nSrcLen; i += 2)
2344 : {
2345 15822 : adfReal[i] = pSrc[i];
2346 15822 : adfReal[i + 1] = pSrc[i + 1];
2347 : }
2348 5558 : break;
2349 : }
2350 :
2351 4114 : case GDT_UInt16:
2352 : {
2353 4114 : GUInt16 *pSrc =
2354 4114 : reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
2355 4114 : pSrc += iSrcOffset;
2356 18492 : for (int i = 0; i < nSrcLen; i += 2)
2357 : {
2358 14378 : adfReal[i] = pSrc[i];
2359 14378 : adfReal[i + 1] = pSrc[i + 1];
2360 : }
2361 4114 : break;
2362 : }
2363 :
2364 1130 : case GDT_Int32:
2365 : {
2366 1130 : GInt32 *pSrc =
2367 1130 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2368 1130 : pSrc += iSrcOffset;
2369 2992 : for (int i = 0; i < nSrcLen; i += 2)
2370 : {
2371 1862 : adfReal[i] = pSrc[i];
2372 1862 : adfReal[i + 1] = pSrc[i + 1];
2373 : }
2374 1130 : break;
2375 : }
2376 :
2377 750 : case GDT_UInt32:
2378 : {
2379 750 : GUInt32 *pSrc =
2380 750 : reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
2381 750 : pSrc += iSrcOffset;
2382 2232 : for (int i = 0; i < nSrcLen; i += 2)
2383 : {
2384 1482 : adfReal[i] = pSrc[i];
2385 1482 : adfReal[i + 1] = pSrc[i + 1];
2386 : }
2387 750 : break;
2388 : }
2389 :
2390 190 : case GDT_Int64:
2391 : {
2392 190 : auto pSrc =
2393 190 : reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
2394 190 : pSrc += iSrcOffset;
2395 380 : for (int i = 0; i < nSrcLen; i += 2)
2396 : {
2397 190 : adfReal[i] = static_cast<double>(pSrc[i]);
2398 190 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2399 : }
2400 190 : break;
2401 : }
2402 :
2403 0 : case GDT_UInt64:
2404 : {
2405 0 : auto pSrc =
2406 0 : reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
2407 0 : pSrc += iSrcOffset;
2408 0 : for (int i = 0; i < nSrcLen; i += 2)
2409 : {
2410 0 : adfReal[i] = static_cast<double>(pSrc[i]);
2411 0 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2412 : }
2413 0 : break;
2414 : }
2415 :
2416 25074 : case GDT_Float32:
2417 : {
2418 25074 : float *pSrc =
2419 25074 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2420 25074 : pSrc += iSrcOffset;
2421 121347 : for (int i = 0; i < nSrcLen; i += 2)
2422 : {
2423 96273 : adfReal[i] = pSrc[i];
2424 96273 : adfReal[i + 1] = pSrc[i + 1];
2425 : }
2426 25074 : break;
2427 : }
2428 :
2429 940 : case GDT_Float64:
2430 : {
2431 940 : double *pSrc =
2432 940 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2433 940 : pSrc += iSrcOffset;
2434 2612 : for (int i = 0; i < nSrcLen; i += 2)
2435 : {
2436 1672 : adfReal[i] = pSrc[i];
2437 1672 : adfReal[i + 1] = pSrc[i + 1];
2438 : }
2439 940 : break;
2440 : }
2441 :
2442 1169410 : case GDT_CInt16:
2443 : {
2444 1169410 : GInt16 *pSrc =
2445 1169410 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2446 1169410 : pSrc += 2 * iSrcOffset;
2447 4676400 : for (int i = 0; i < nSrcLen; i += 2)
2448 : {
2449 3506990 : adfReal[i] = pSrc[2 * i];
2450 3506990 : padfImag[i] = pSrc[2 * i + 1];
2451 :
2452 3506990 : adfReal[i + 1] = pSrc[2 * i + 2];
2453 3506990 : padfImag[i + 1] = pSrc[2 * i + 3];
2454 : }
2455 1169410 : break;
2456 : }
2457 :
2458 940 : case GDT_CInt32:
2459 : {
2460 940 : GInt32 *pSrc =
2461 940 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2462 940 : pSrc += 2 * iSrcOffset;
2463 2612 : for (int i = 0; i < nSrcLen; i += 2)
2464 : {
2465 1672 : adfReal[i] = pSrc[2 * i];
2466 1672 : padfImag[i] = pSrc[2 * i + 1];
2467 :
2468 1672 : adfReal[i + 1] = pSrc[2 * i + 2];
2469 1672 : padfImag[i + 1] = pSrc[2 * i + 3];
2470 : }
2471 940 : break;
2472 : }
2473 :
2474 940 : case GDT_CFloat32:
2475 : {
2476 940 : float *pSrc =
2477 940 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2478 940 : pSrc += 2 * iSrcOffset;
2479 2612 : for (int i = 0; i < nSrcLen; i += 2)
2480 : {
2481 1672 : adfReal[i] = pSrc[2 * i];
2482 1672 : padfImag[i] = pSrc[2 * i + 1];
2483 :
2484 1672 : adfReal[i + 1] = pSrc[2 * i + 2];
2485 1672 : padfImag[i + 1] = pSrc[2 * i + 3];
2486 : }
2487 940 : break;
2488 : }
2489 :
2490 940 : case GDT_CFloat64:
2491 : {
2492 940 : double *pSrc =
2493 940 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2494 940 : pSrc += 2 * iSrcOffset;
2495 2612 : for (int i = 0; i < nSrcLen; i += 2)
2496 : {
2497 1672 : adfReal[i] = pSrc[2 * i];
2498 1672 : padfImag[i] = pSrc[2 * i + 1];
2499 :
2500 1672 : adfReal[i + 1] = pSrc[2 * i + 2];
2501 1672 : padfImag[i + 1] = pSrc[2 * i + 3];
2502 : }
2503 940 : break;
2504 : }
2505 :
2506 0 : case GDT_Unknown:
2507 : case GDT_TypeCount:
2508 0 : CPLAssert(false);
2509 : if (padfDensity)
2510 : memset(padfDensity, 0, nSrcLen * sizeof(double));
2511 : return false;
2512 : }
2513 :
2514 2331040 : if (padfDensity == nullptr)
2515 1197590 : return true;
2516 :
2517 1133450 : if (poWK->pafUnifiedSrcDensity == nullptr)
2518 : {
2519 3234200 : for (int i = 0; i < nSrcLen; i += 2)
2520 : {
2521 : // Take into account earlier calcs.
2522 2112850 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2523 : {
2524 2072950 : padfDensity[i] = 1.0;
2525 2072950 : bHasValid = true;
2526 : }
2527 :
2528 2112850 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2529 : {
2530 2073600 : padfDensity[i + 1] = 1.0;
2531 2073600 : bHasValid = true;
2532 : }
2533 : }
2534 : }
2535 : else
2536 : {
2537 54348 : for (int i = 0; i < nSrcLen; i += 2)
2538 : {
2539 42243 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2540 42243 : padfDensity[i] = poWK->pafUnifiedSrcDensity[iSrcOffset + i];
2541 42243 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2542 41704 : bHasValid = true;
2543 :
2544 42243 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2545 42243 : padfDensity[i + 1] =
2546 42243 : poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1];
2547 42243 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2548 41598 : bHasValid = true;
2549 : }
2550 : }
2551 :
2552 1133450 : return bHasValid;
2553 : }
2554 :
2555 : /************************************************************************/
2556 : /* GWKGetPixelT() */
2557 : /************************************************************************/
2558 :
2559 : template <class T>
2560 7332114 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
2561 : GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
2562 :
2563 : {
2564 7332114 : T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2565 :
2566 16802154 : if ((poWK->panUnifiedSrcValid != nullptr &&
2567 14664208 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
2568 7332114 : (poWK->papanBandSrcValid != nullptr &&
2569 21 : poWK->papanBandSrcValid[iBand] != nullptr &&
2570 21 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
2571 : {
2572 9 : *pdfDensity = 0.0;
2573 9 : return false;
2574 : }
2575 :
2576 7332104 : *pValue = pSrc[iSrcOffset];
2577 :
2578 7332104 : if (poWK->pafUnifiedSrcDensity == nullptr)
2579 6997351 : *pdfDensity = 1.0;
2580 : else
2581 334754 : *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2582 :
2583 7332104 : return *pdfDensity != 0.0;
2584 : }
2585 :
2586 : /************************************************************************/
2587 : /* GWKBilinearResample() */
2588 : /* Set of bilinear interpolators */
2589 : /************************************************************************/
2590 :
2591 72664 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
2592 : double dfSrcX, double dfSrcY,
2593 : double *pdfDensity, double *pdfReal,
2594 : double *pdfImag)
2595 :
2596 : {
2597 : // Save as local variables to avoid following pointers.
2598 72664 : const int nSrcXSize = poWK->nSrcXSize;
2599 72664 : const int nSrcYSize = poWK->nSrcYSize;
2600 :
2601 72664 : int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2602 72664 : int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2603 72664 : double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2604 72664 : double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2605 72664 : bool bShifted = false;
2606 :
2607 72664 : if (iSrcX == -1)
2608 : {
2609 292 : iSrcX = 0;
2610 292 : dfRatioX = 1;
2611 : }
2612 72664 : if (iSrcY == -1)
2613 : {
2614 7686 : iSrcY = 0;
2615 7686 : dfRatioY = 1;
2616 : }
2617 72664 : GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
2618 :
2619 : // Shift so we don't overrun the array.
2620 72664 : if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
2621 72614 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
2622 72614 : iSrcOffset + nSrcXSize + 1)
2623 : {
2624 100 : bShifted = true;
2625 100 : --iSrcOffset;
2626 : }
2627 :
2628 72664 : double adfDensity[2] = {0.0, 0.0};
2629 72664 : double adfReal[2] = {0.0, 0.0};
2630 72664 : double adfImag[2] = {0.0, 0.0};
2631 72664 : double dfAccumulatorReal = 0.0;
2632 72664 : double dfAccumulatorImag = 0.0;
2633 72664 : double dfAccumulatorDensity = 0.0;
2634 72664 : double dfAccumulatorDivisor = 0.0;
2635 :
2636 72664 : const GPtrDiff_t nSrcPixels =
2637 72664 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
2638 : // Get pixel row.
2639 72664 : if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
2640 145328 : iSrcOffset < nSrcPixels &&
2641 72664 : GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
2642 : adfImag))
2643 : {
2644 67008 : double dfMult1 = dfRatioX * dfRatioY;
2645 67008 : double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
2646 :
2647 : // Shifting corrected.
2648 67008 : if (bShifted)
2649 : {
2650 100 : adfReal[0] = adfReal[1];
2651 100 : adfImag[0] = adfImag[1];
2652 100 : adfDensity[0] = adfDensity[1];
2653 : }
2654 :
2655 : // Upper Left Pixel.
2656 67008 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2657 67008 : adfDensity[0] > SRC_DENSITY_THRESHOLD)
2658 : {
2659 61578 : dfAccumulatorDivisor += dfMult1;
2660 :
2661 61578 : dfAccumulatorReal += adfReal[0] * dfMult1;
2662 61578 : dfAccumulatorImag += adfImag[0] * dfMult1;
2663 61578 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
2664 : }
2665 :
2666 : // Upper Right Pixel.
2667 67008 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2668 66427 : adfDensity[1] > SRC_DENSITY_THRESHOLD)
2669 : {
2670 61153 : dfAccumulatorDivisor += dfMult2;
2671 :
2672 61153 : dfAccumulatorReal += adfReal[1] * dfMult2;
2673 61153 : dfAccumulatorImag += adfImag[1] * dfMult2;
2674 61153 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
2675 : }
2676 : }
2677 :
2678 : // Get pixel row.
2679 72664 : if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
2680 213910 : iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
2681 68582 : GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
2682 : adfReal, adfImag))
2683 : {
2684 63023 : double dfMult1 = dfRatioX * (1.0 - dfRatioY);
2685 63023 : double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2686 :
2687 : // Shifting corrected
2688 63023 : if (bShifted)
2689 : {
2690 50 : adfReal[0] = adfReal[1];
2691 50 : adfImag[0] = adfImag[1];
2692 50 : adfDensity[0] = adfDensity[1];
2693 : }
2694 :
2695 : // Lower Left Pixel
2696 63023 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2697 63023 : adfDensity[0] > SRC_DENSITY_THRESHOLD)
2698 : {
2699 57744 : dfAccumulatorDivisor += dfMult1;
2700 :
2701 57744 : dfAccumulatorReal += adfReal[0] * dfMult1;
2702 57744 : dfAccumulatorImag += adfImag[0] * dfMult1;
2703 57744 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
2704 : }
2705 :
2706 : // Lower Right Pixel.
2707 63023 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2708 62492 : adfDensity[1] > SRC_DENSITY_THRESHOLD)
2709 : {
2710 57515 : dfAccumulatorDivisor += dfMult2;
2711 :
2712 57515 : dfAccumulatorReal += adfReal[1] * dfMult2;
2713 57515 : dfAccumulatorImag += adfImag[1] * dfMult2;
2714 57515 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
2715 : }
2716 : }
2717 :
2718 : /* -------------------------------------------------------------------- */
2719 : /* Return result. */
2720 : /* -------------------------------------------------------------------- */
2721 72664 : if (dfAccumulatorDivisor == 1.0)
2722 : {
2723 41607 : *pdfReal = dfAccumulatorReal;
2724 41607 : *pdfImag = dfAccumulatorImag;
2725 41607 : *pdfDensity = dfAccumulatorDensity;
2726 41607 : return false;
2727 : }
2728 31057 : else if (dfAccumulatorDivisor < 0.00001)
2729 : {
2730 0 : *pdfReal = 0.0;
2731 0 : *pdfImag = 0.0;
2732 0 : *pdfDensity = 0.0;
2733 0 : return false;
2734 : }
2735 : else
2736 : {
2737 31057 : *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
2738 31057 : *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
2739 31057 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
2740 31057 : return true;
2741 : }
2742 : }
2743 :
2744 : template <class T>
2745 5116014 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
2746 : int iBand, double dfSrcX,
2747 : double dfSrcY, T *pValue)
2748 :
2749 : {
2750 :
2751 5116014 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2752 5116014 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2753 5116014 : GPtrDiff_t iSrcOffset =
2754 5116014 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
2755 5116014 : const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2756 5116014 : const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2757 :
2758 5116014 : const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2759 :
2760 5116014 : if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2761 5012847 : iSrcY + 1 < poWK->nSrcYSize)
2762 : {
2763 4988678 : const double dfAccumulator =
2764 4988678 : (pSrc[iSrcOffset] * dfRatioX +
2765 4988678 : pSrc[iSrcOffset + 1] * (1.0 - dfRatioX)) *
2766 : dfRatioY +
2767 4988678 : (pSrc[iSrcOffset + poWK->nSrcXSize] * dfRatioX +
2768 4988678 : pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * (1.0 - dfRatioX)) *
2769 4988678 : (1.0 - dfRatioY);
2770 :
2771 4988678 : *pValue = GWKRoundValueT<T>(dfAccumulator);
2772 :
2773 4988678 : return true;
2774 : }
2775 :
2776 127349 : double dfAccumulatorDivisor = 0.0;
2777 127349 : double dfAccumulator = 0.0;
2778 :
2779 : // Upper Left Pixel.
2780 127349 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
2781 53440 : iSrcY < poWK->nSrcYSize)
2782 : {
2783 53440 : const double dfMult = dfRatioX * dfRatioY;
2784 :
2785 53440 : dfAccumulatorDivisor += dfMult;
2786 :
2787 53440 : dfAccumulator += pSrc[iSrcOffset] * dfMult;
2788 : }
2789 :
2790 : // Upper Right Pixel.
2791 127349 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2792 61354 : iSrcY < poWK->nSrcYSize)
2793 : {
2794 61354 : const double dfMult = (1.0 - dfRatioX) * dfRatioY;
2795 :
2796 61354 : dfAccumulatorDivisor += dfMult;
2797 :
2798 61354 : dfAccumulator += pSrc[iSrcOffset + 1] * dfMult;
2799 : }
2800 :
2801 : // Lower Right Pixel.
2802 127349 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
2803 97471 : iSrcY + 1 < poWK->nSrcYSize)
2804 : {
2805 72902 : const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2806 :
2807 72902 : dfAccumulatorDivisor += dfMult;
2808 :
2809 72902 : dfAccumulator += pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * dfMult;
2810 : }
2811 :
2812 : // Lower Left Pixel.
2813 127349 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
2814 89535 : iSrcY + 1 < poWK->nSrcYSize)
2815 : {
2816 64758 : const double dfMult = dfRatioX * (1.0 - dfRatioY);
2817 :
2818 64758 : dfAccumulatorDivisor += dfMult;
2819 :
2820 64758 : dfAccumulator += pSrc[iSrcOffset + poWK->nSrcXSize] * dfMult;
2821 : }
2822 :
2823 : /* -------------------------------------------------------------------- */
2824 : /* Return result. */
2825 : /* -------------------------------------------------------------------- */
2826 127349 : double dfValue = 0.0;
2827 :
2828 127349 : if (dfAccumulatorDivisor < 0.00001)
2829 : {
2830 0 : *pValue = 0;
2831 0 : return false;
2832 : }
2833 127349 : else if (dfAccumulatorDivisor == 1.0)
2834 : {
2835 8767 : dfValue = dfAccumulator;
2836 : }
2837 : else
2838 : {
2839 118582 : dfValue = dfAccumulator / dfAccumulatorDivisor;
2840 : }
2841 :
2842 127349 : *pValue = GWKRoundValueT<T>(dfValue);
2843 :
2844 127349 : return true;
2845 : }
2846 :
2847 : /************************************************************************/
2848 : /* GWKCubicResample() */
2849 : /* Set of bicubic interpolators using cubic convolution. */
2850 : /************************************************************************/
2851 :
2852 : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
2853 : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
2854 : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
2855 :
2856 : template <typename T>
2857 1602850 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
2858 : T f1, T f2, T f3)
2859 : {
2860 1602850 : return (f1 + T(0.5) * (distance1 * (f2 - f0) +
2861 1602850 : distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
2862 1602850 : distance3 * (3 * (f1 - f2) + f3 - f0)));
2863 : }
2864 :
2865 : /************************************************************************/
2866 : /* GWKCubicComputeWeights() */
2867 : /************************************************************************/
2868 :
2869 : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
2870 :
2871 : template <typename T>
2872 2267674 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
2873 : {
2874 2267674 : const T halfX = T(0.5) * x;
2875 2267674 : const T threeX = T(3.0) * x;
2876 2267674 : const T halfX2 = halfX * x;
2877 :
2878 2267674 : coeffs[0] = halfX * (-1 + x * (2 - x));
2879 2267674 : coeffs[1] = 1 + halfX2 * (-5 + threeX);
2880 2267674 : coeffs[2] = halfX * (1 + x * (4 - threeX));
2881 2267674 : coeffs[3] = halfX2 * (-1 + x);
2882 2267674 : }
2883 :
2884 : // TODO(schwehr): Use an inline function.
2885 : #define CONVOL4(v1, v2) \
2886 : ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2] + \
2887 : (v1)[3] * (v2)[3])
2888 :
2889 : #if 0
2890 : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
2891 : // instead of 17.
2892 : // TODO(schwehr): Use an inline function.
2893 : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \
2894 : { \
2895 : const double dfX = dfX_; \
2896 : dfHalfX = 0.5 * dfX; \
2897 : const double dfThreeX = 3.0 * dfX; \
2898 : const double dfXMinus1 = dfX - 1; \
2899 : \
2900 : adfCoeffs[0] = -1 + dfX * (2 - dfX); \
2901 : adfCoeffs[1] = dfX * (-5 + dfThreeX); \
2902 : /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \
2903 : adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \
2904 : /*adfCoeffs[3] = dfX * (-1 + dfX); */ \
2905 : adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \
2906 : }
2907 :
2908 : // TODO(schwehr): Use an inline function.
2909 : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \
2910 : ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
2911 : (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
2912 : #endif
2913 :
2914 299879 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
2915 : double dfSrcX, double dfSrcY,
2916 : double *pdfDensity, double *pdfReal,
2917 : double *pdfImag)
2918 :
2919 : {
2920 299879 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
2921 299879 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
2922 299879 : GPtrDiff_t iSrcOffset =
2923 299879 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
2924 299879 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
2925 299879 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
2926 299879 : double adfDensity[4] = {};
2927 299879 : double adfReal[4] = {};
2928 299879 : double adfImag[4] = {};
2929 :
2930 : // Get the bilinear interpolation at the image borders.
2931 299879 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
2932 284412 : iSrcY + 2 >= poWK->nSrcYSize)
2933 24136 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
2934 24136 : pdfDensity, pdfReal, pdfImag);
2935 :
2936 275743 : double adfValueDens[4] = {};
2937 275743 : double adfValueReal[4] = {};
2938 275743 : double adfValueImag[4] = {};
2939 :
2940 275743 : double adfCoeffsX[4] = {};
2941 275743 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
2942 :
2943 1232410 : for (GPtrDiff_t i = -1; i < 3; i++)
2944 : {
2945 1003120 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
2946 991507 : 2, adfDensity, adfReal, adfImag) ||
2947 991507 : adfDensity[0] < SRC_DENSITY_THRESHOLD ||
2948 973867 : adfDensity[1] < SRC_DENSITY_THRESHOLD ||
2949 2960190 : adfDensity[2] < SRC_DENSITY_THRESHOLD ||
2950 965566 : adfDensity[3] < SRC_DENSITY_THRESHOLD)
2951 : {
2952 46449 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
2953 46449 : pdfDensity, pdfReal, pdfImag);
2954 : }
2955 :
2956 956668 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
2957 956668 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
2958 956668 : adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
2959 : }
2960 :
2961 : /* -------------------------------------------------------------------- */
2962 : /* For now, if we have any pixels missing in the kernel area, */
2963 : /* we fallback on using bilinear interpolation. Ideally we */
2964 : /* should do "weight adjustment" of our results similarly to */
2965 : /* what is done for the cubic spline and lanc. interpolators. */
2966 : /* -------------------------------------------------------------------- */
2967 :
2968 229294 : double adfCoeffsY[4] = {};
2969 229294 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
2970 :
2971 229294 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
2972 229294 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
2973 229294 : *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
2974 :
2975 229294 : return true;
2976 : }
2977 :
2978 : #ifdef USE_SSE2
2979 :
2980 : /************************************************************************/
2981 : /* XMMLoad4Values() */
2982 : /* */
2983 : /* Load 4 packed byte or uint16, cast them to float and put them in a */
2984 : /* m128 register. */
2985 : /************************************************************************/
2986 :
2987 949092 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
2988 : {
2989 : unsigned int i;
2990 949092 : memcpy(&i, ptr, 4);
2991 1898180 : __m128i xmm_i = _mm_cvtsi32_si128(i);
2992 : // Zero extend 4 packed unsigned 8-bit integers in a to packed
2993 : // 32-bit integers.
2994 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
2995 : xmm_i = _mm_cvtepu8_epi32(xmm_i);
2996 : #else
2997 1898180 : xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
2998 1898180 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
2999 : #endif
3000 1898180 : return _mm_cvtepi32_ps(xmm_i);
3001 : }
3002 :
3003 5292 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
3004 : {
3005 : GUInt64 i;
3006 5292 : memcpy(&i, ptr, 8);
3007 10584 : __m128i xmm_i = _mm_cvtsi64_si128(i);
3008 : // Zero extend 4 packed unsigned 16-bit integers in a to packed
3009 : // 32-bit integers.
3010 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3011 : xmm_i = _mm_cvtepu16_epi32(xmm_i);
3012 : #else
3013 10584 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3014 : #endif
3015 10584 : return _mm_cvtepi32_ps(xmm_i);
3016 : }
3017 :
3018 : /************************************************************************/
3019 : /* XMMHorizontalAdd() */
3020 : /* */
3021 : /* Return the sum of the 4 floating points of the register. */
3022 : /************************************************************************/
3023 :
3024 : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
3025 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3026 : {
3027 : __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1)
3028 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0)
3029 : shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2)
3030 : sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2)
3031 : return _mm_cvtss_f32(sums);
3032 : }
3033 : #else
3034 238596 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3035 : {
3036 238596 : __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2)
3037 238596 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0)
3038 238596 : shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1)
3039 238596 : sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1)
3040 238596 : return _mm_cvtss_f32(sums);
3041 : }
3042 : #endif
3043 :
3044 : #endif // define USE_SSE2
3045 :
3046 : /************************************************************************/
3047 : /* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */
3048 : /************************************************************************/
3049 :
3050 : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
3051 : // because there are a few assumptions above those types.
3052 : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
3053 : // perf benefit.
3054 :
3055 : template <class T>
3056 361 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
3057 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3058 : double *pdfDensity, double *pdfReal)
3059 : {
3060 361 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3061 361 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3062 361 : const GPtrDiff_t iSrcOffset =
3063 361 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3064 :
3065 : // Get the bilinear interpolation at the image borders.
3066 361 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3067 361 : iSrcY + 2 >= poWK->nSrcYSize)
3068 : {
3069 0 : double adfImagIgnored[4] = {};
3070 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3071 0 : pdfDensity, pdfReal, adfImagIgnored);
3072 : }
3073 :
3074 : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3075 : const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
3076 : const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
3077 :
3078 : // TODO(schwehr): Explain the magic numbers.
3079 : float afTemp[4 + 4 + 4 + 1];
3080 : float *pafAligned =
3081 : reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
3082 : float *pafCoeffs = pafAligned;
3083 : float *pafDensity = pafAligned + 4;
3084 : float *pafValue = pafAligned + 8;
3085 :
3086 : const float fHalfDeltaX = 0.5f * fDeltaX;
3087 : const float fThreeDeltaX = 3.0f * fDeltaX;
3088 : const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
3089 :
3090 : pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
3091 : pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
3092 : pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
3093 : pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
3094 : __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
3095 : const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD);
3096 :
3097 : __m128 xmmMaskLowDensity = _mm_setzero_ps();
3098 : for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
3099 : i++, iOffset += poWK->nSrcXSize)
3100 : {
3101 : const __m128 xmmDensity =
3102 : _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
3103 : xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
3104 : _mm_cmplt_ps(xmmDensity, xmmThreshold));
3105 : pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3106 :
3107 : const __m128 xmmValues =
3108 : XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
3109 : pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
3110 : }
3111 : if (_mm_movemask_ps(xmmMaskLowDensity))
3112 : {
3113 : double adfImagIgnored[4] = {};
3114 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3115 : pdfDensity, pdfReal, adfImagIgnored);
3116 : }
3117 :
3118 : const float fHalfDeltaY = 0.5f * fDeltaY;
3119 : const float fThreeDeltaY = 3.0f * fDeltaY;
3120 : const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
3121 :
3122 : pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
3123 : pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
3124 : pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
3125 : pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
3126 :
3127 : xmmCoeffs = _mm_load_ps(pafCoeffs);
3128 :
3129 : const __m128 xmmDensity = _mm_load_ps(pafDensity);
3130 : const __m128 xmmValue = _mm_load_ps(pafValue);
3131 : *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3132 : *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
3133 :
3134 : // We did all above computations on float32 whereas the general case is
3135 : // float64. Not sure if one is fundamentally more correct than the other
3136 : // one, but we want our optimization to give the same result as the
3137 : // general case as much as possible, so if the resulting value is
3138 : // close to some_int_value + 0.5, redo the computation with the general
3139 : // case.
3140 : // Note: If other types than Byte or UInt16, will need changes.
3141 : if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
3142 : return true;
3143 :
3144 : #endif // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3145 :
3146 361 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3147 361 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3148 :
3149 361 : double adfValueDens[4] = {};
3150 361 : double adfValueReal[4] = {};
3151 :
3152 361 : double adfCoeffsX[4] = {};
3153 361 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3154 :
3155 361 : double adfCoeffsY[4] = {};
3156 361 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3157 :
3158 1433 : for (GPtrDiff_t i = -1; i < 3; i++)
3159 : {
3160 1177 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3161 : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
3162 1177 : if (poWK->pafUnifiedSrcDensity[iOffset + 0] < SRC_DENSITY_THRESHOLD ||
3163 1089 : poWK->pafUnifiedSrcDensity[iOffset + 1] < SRC_DENSITY_THRESHOLD ||
3164 1089 : poWK->pafUnifiedSrcDensity[iOffset + 2] < SRC_DENSITY_THRESHOLD ||
3165 1089 : poWK->pafUnifiedSrcDensity[iOffset + 3] < SRC_DENSITY_THRESHOLD)
3166 : {
3167 105 : double adfImagIgnored[4] = {};
3168 105 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3169 : pdfDensity, pdfReal,
3170 105 : adfImagIgnored);
3171 : }
3172 : #endif
3173 :
3174 1072 : adfValueDens[i + 1] =
3175 1072 : CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
3176 :
3177 1072 : adfValueReal[i + 1] = CONVOL4(
3178 : adfCoeffsX,
3179 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3180 : }
3181 :
3182 256 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3183 256 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3184 :
3185 256 : return true;
3186 : }
3187 :
3188 : /************************************************************************/
3189 : /* GWKCubicResampleSrcMaskIsDensity4SampleReal() */
3190 : /* Bi-cubic when source has and only has pafUnifiedSrcDensity. */
3191 : /************************************************************************/
3192 :
3193 0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
3194 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3195 : double *pdfDensity, double *pdfReal)
3196 :
3197 : {
3198 0 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3199 0 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3200 0 : const GPtrDiff_t iSrcOffset =
3201 0 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3202 0 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3203 0 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3204 :
3205 : // Get the bilinear interpolation at the image borders.
3206 0 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3207 0 : iSrcY + 2 >= poWK->nSrcYSize)
3208 : {
3209 0 : double adfImagIgnored[4] = {};
3210 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3211 0 : pdfDensity, pdfReal, adfImagIgnored);
3212 : }
3213 :
3214 0 : double adfCoeffsX[4] = {};
3215 0 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3216 :
3217 0 : double adfCoeffsY[4] = {};
3218 0 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3219 :
3220 0 : double adfValueDens[4] = {};
3221 0 : double adfValueReal[4] = {};
3222 0 : double adfDensity[4] = {};
3223 0 : double adfReal[4] = {};
3224 0 : double adfImagIgnored[4] = {};
3225 :
3226 0 : for (GPtrDiff_t i = -1; i < 3; i++)
3227 : {
3228 0 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3229 0 : 2, adfDensity, adfReal, adfImagIgnored) ||
3230 0 : adfDensity[0] < SRC_DENSITY_THRESHOLD ||
3231 0 : adfDensity[1] < SRC_DENSITY_THRESHOLD ||
3232 0 : adfDensity[2] < SRC_DENSITY_THRESHOLD ||
3233 0 : adfDensity[3] < SRC_DENSITY_THRESHOLD)
3234 : {
3235 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3236 : pdfDensity, pdfReal,
3237 0 : adfImagIgnored);
3238 : }
3239 :
3240 0 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3241 0 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3242 : }
3243 :
3244 0 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3245 0 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3246 :
3247 0 : return true;
3248 : }
3249 :
3250 : template <class T>
3251 1906603 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3252 : int iBand, double dfSrcX,
3253 : double dfSrcY, T *pValue)
3254 :
3255 : {
3256 1906603 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3257 1906603 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3258 1906603 : const GPtrDiff_t iSrcOffset =
3259 1906603 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3260 1906603 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3261 1906603 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3262 1906603 : const double dfDeltaY2 = dfDeltaY * dfDeltaY;
3263 1906603 : const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
3264 :
3265 : // Get the bilinear interpolation at the image borders.
3266 1906603 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3267 1662527 : iSrcY + 2 >= poWK->nSrcYSize)
3268 303751 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3269 303751 : pValue);
3270 :
3271 1602852 : double adfCoeffs[4] = {};
3272 1602852 : GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
3273 :
3274 1602852 : double adfValue[4] = {};
3275 :
3276 8014250 : for (GPtrDiff_t i = -1; i < 3; i++)
3277 : {
3278 6411406 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3279 :
3280 6411406 : adfValue[i + 1] = CONVOL4(
3281 : adfCoeffs,
3282 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3283 : }
3284 :
3285 : const double dfValue =
3286 1602852 : CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
3287 : adfValue[1], adfValue[2], adfValue[3]);
3288 :
3289 1602852 : *pValue = GWKClampValueT<T>(dfValue);
3290 :
3291 1602852 : return true;
3292 : }
3293 :
3294 : /************************************************************************/
3295 : /* GWKLanczosSinc() */
3296 : /************************************************************************/
3297 :
3298 : /*
3299 : * Lanczos windowed sinc interpolation kernel with radius r.
3300 : * /
3301 : * | sinc(x) * sinc(x/r), if |x| < r
3302 : * L(x) = | 1, if x = 0 ,
3303 : * | 0, otherwise
3304 : * \
3305 : *
3306 : * where sinc(x) = sin(PI * x) / (PI * x).
3307 : */
3308 :
3309 1056 : static double GWKLanczosSinc(double dfX)
3310 : {
3311 1056 : if (dfX == 0.0)
3312 0 : return 1.0;
3313 :
3314 1056 : const double dfPIX = M_PI * dfX;
3315 1056 : const double dfPIXoverR = dfPIX / 3;
3316 1056 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3317 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3318 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3319 1056 : const double dfSinPIXoverR = sin(dfPIXoverR);
3320 1056 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3321 1056 : const double dfSinPIXMulSinPIXoverR =
3322 1056 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3323 1056 : return dfSinPIXMulSinPIXoverR / dfPIX2overR;
3324 : }
3325 :
3326 106242 : static double GWKLanczosSinc4Values(double *padfValues)
3327 : {
3328 531210 : for (int i = 0; i < 4; i++)
3329 : {
3330 424968 : if (padfValues[i] == 0.0)
3331 : {
3332 0 : padfValues[i] = 1.0;
3333 : }
3334 : else
3335 : {
3336 424968 : const double dfPIX = M_PI * padfValues[i];
3337 424968 : const double dfPIXoverR = dfPIX / 3;
3338 424968 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3339 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3340 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3341 424968 : const double dfSinPIXoverR = sin(dfPIXoverR);
3342 424968 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3343 424968 : const double dfSinPIXMulSinPIXoverR =
3344 424968 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3345 424968 : padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
3346 : }
3347 : }
3348 106242 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3349 : }
3350 :
3351 : /************************************************************************/
3352 : /* GWKBilinear() */
3353 : /************************************************************************/
3354 :
3355 6669050 : static double GWKBilinear(double dfX)
3356 : {
3357 6669050 : double dfAbsX = fabs(dfX);
3358 6669050 : if (dfAbsX <= 1.0)
3359 6197920 : return 1 - dfAbsX;
3360 : else
3361 471127 : return 0.0;
3362 : }
3363 :
3364 396360 : static double GWKBilinear4Values(double *padfValues)
3365 : {
3366 396360 : double dfAbsX0 = fabs(padfValues[0]);
3367 396360 : double dfAbsX1 = fabs(padfValues[1]);
3368 396360 : double dfAbsX2 = fabs(padfValues[2]);
3369 396360 : double dfAbsX3 = fabs(padfValues[3]);
3370 396360 : if (dfAbsX0 <= 1.0)
3371 290431 : padfValues[0] = 1 - dfAbsX0;
3372 : else
3373 105929 : padfValues[0] = 0.0;
3374 396360 : if (dfAbsX1 <= 1.0)
3375 396360 : padfValues[1] = 1 - dfAbsX1;
3376 : else
3377 0 : padfValues[1] = 0.0;
3378 396360 : if (dfAbsX2 <= 1.0)
3379 396360 : padfValues[2] = 1 - dfAbsX2;
3380 : else
3381 0 : padfValues[2] = 0.0;
3382 396360 : if (dfAbsX3 <= 1.0)
3383 290324 : padfValues[3] = 1 - dfAbsX3;
3384 : else
3385 106036 : padfValues[3] = 0.0;
3386 396360 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3387 : }
3388 :
3389 : /************************************************************************/
3390 : /* GWKCubic() */
3391 : /************************************************************************/
3392 :
3393 4335650 : static double GWKCubic(double dfX)
3394 : {
3395 4335650 : return CubicKernel(dfX);
3396 : }
3397 :
3398 7053180 : static double GWKCubic4Values(double *padfValues)
3399 : {
3400 7053180 : const double dfAbsX_0 = fabs(padfValues[0]);
3401 7053180 : const double dfAbsX_1 = fabs(padfValues[1]);
3402 7053180 : const double dfAbsX_2 = fabs(padfValues[2]);
3403 7053180 : const double dfAbsX_3 = fabs(padfValues[3]);
3404 7053180 : const double dfX2_0 = padfValues[0] * padfValues[0];
3405 7053180 : const double dfX2_1 = padfValues[1] * padfValues[1];
3406 7053180 : const double dfX2_2 = padfValues[2] * padfValues[2];
3407 7053180 : const double dfX2_3 = padfValues[3] * padfValues[3];
3408 :
3409 7053180 : double dfVal0 = 0.0;
3410 7053180 : if (dfAbsX_0 <= 1.0)
3411 1028260 : dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
3412 6024920 : else if (dfAbsX_0 <= 2.0)
3413 4286600 : dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
3414 :
3415 7053180 : double dfVal1 = 0.0;
3416 7053180 : if (dfAbsX_1 <= 1.0)
3417 4103920 : dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
3418 2949270 : else if (dfAbsX_1 <= 2.0)
3419 2962560 : dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
3420 :
3421 7053180 : double dfVal2 = 0.0;
3422 7053180 : if (dfAbsX_2 <= 1.0)
3423 5916670 : dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
3424 1136510 : else if (dfAbsX_2 <= 2.0)
3425 1149260 : dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
3426 :
3427 7053180 : double dfVal3 = 0.0;
3428 7053180 : if (dfAbsX_3 <= 1.0)
3429 3161400 : dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
3430 3891790 : else if (dfAbsX_3 <= 2.0)
3431 3636100 : dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
3432 :
3433 7053180 : padfValues[0] = dfVal0;
3434 7053180 : padfValues[1] = dfVal1;
3435 7053180 : padfValues[2] = dfVal2;
3436 7053180 : padfValues[3] = dfVal3;
3437 7053180 : return dfVal0 + dfVal1 + dfVal2 + dfVal3;
3438 : }
3439 :
3440 : /************************************************************************/
3441 : /* GWKBSpline() */
3442 : /************************************************************************/
3443 :
3444 : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
3445 : // Equation 8 with (B,C)=(1,0)
3446 : // 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1
3447 : // 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| >= 1 and |x| < 2
3448 :
3449 138696 : static double GWKBSpline(double x)
3450 : {
3451 138696 : const double xp2 = x + 2.0;
3452 138696 : const double xp1 = x + 1.0;
3453 138696 : const double xm1 = x - 1.0;
3454 :
3455 : // This will most likely be used, so we'll compute it ahead of time to
3456 : // avoid stalling the processor.
3457 138696 : const double xp2c = xp2 * xp2 * xp2;
3458 :
3459 : // Note that the test is computed only if it is needed.
3460 : // TODO(schwehr): Make this easier to follow.
3461 : return xp2 > 0.0
3462 277392 : ? ((xp1 > 0.0)
3463 138696 : ? ((x > 0.0)
3464 124338 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3465 89912 : 6.0 * x * x * x
3466 : : 0.0) +
3467 124338 : -4.0 * xp1 * xp1 * xp1
3468 : : 0.0) +
3469 : xp2c
3470 138696 : : 0.0; // * 0.166666666666666666666
3471 : }
3472 :
3473 2220360 : static double GWKBSpline4Values(double *padfValues)
3474 : {
3475 11101800 : for (int i = 0; i < 4; i++)
3476 : {
3477 8881440 : const double x = padfValues[i];
3478 8881440 : const double xp2 = x + 2.0;
3479 8881440 : const double xp1 = x + 1.0;
3480 8881440 : const double xm1 = x - 1.0;
3481 :
3482 : // This will most likely be used, so we'll compute it ahead of time to
3483 : // avoid stalling the processor.
3484 8881440 : const double xp2c = xp2 * xp2 * xp2;
3485 :
3486 : // Note that the test is computed only if it is needed.
3487 : // TODO(schwehr): Make this easier to follow.
3488 8881440 : padfValues[i] =
3489 : (xp2 > 0.0)
3490 17762900 : ? ((xp1 > 0.0)
3491 8881440 : ? ((x > 0.0)
3492 6660880 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3493 4437750 : 6.0 * x * x * x
3494 : : 0.0) +
3495 6660880 : -4.0 * xp1 * xp1 * xp1
3496 : : 0.0) +
3497 : xp2c
3498 : : 0.0; // * 0.166666666666666666666
3499 : }
3500 2220360 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3501 : }
3502 : /************************************************************************/
3503 : /* GWKResampleWrkStruct */
3504 : /************************************************************************/
3505 :
3506 : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
3507 :
3508 : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
3509 : double dfSrcX, double dfSrcY,
3510 : double *pdfDensity, double *pdfReal,
3511 : double *pdfImag,
3512 : GWKResampleWrkStruct *psWrkStruct);
3513 :
3514 : struct _GWKResampleWrkStruct
3515 : {
3516 : pfnGWKResampleType pfnGWKResample;
3517 :
3518 : // Space for saved X weights.
3519 : double *padfWeightsX;
3520 : bool *pabCalcX;
3521 :
3522 : double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos.
3523 : int iLastSrcX; // Only used by GWKResampleOptimizedLanczos.
3524 : int iLastSrcY; // Only used by GWKResampleOptimizedLanczos.
3525 : double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos.
3526 : double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos.
3527 : double dfCosPiXScale; // Only used by GWKResampleOptimizedLanczos.
3528 : double dfSinPiXScale; // Only used by GWKResampleOptimizedLanczos.
3529 : double dfCosPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3530 : double dfSinPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3531 : double dfCosPiYScale; // Only used by GWKResampleOptimizedLanczos.
3532 : double dfSinPiYScale; // Only used by GWKResampleOptimizedLanczos.
3533 : double dfCosPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3534 : double dfSinPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3535 :
3536 : // Space for saving a row of pixels.
3537 : double *padfRowDensity;
3538 : double *padfRowReal;
3539 : double *padfRowImag;
3540 : };
3541 :
3542 : /************************************************************************/
3543 : /* GWKResampleCreateWrkStruct() */
3544 : /************************************************************************/
3545 :
3546 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3547 : double dfSrcY, double *pdfDensity, double *pdfReal,
3548 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
3549 :
3550 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3551 : double dfSrcX, double dfSrcY,
3552 : double *pdfDensity, double *pdfReal,
3553 : double *pdfImag,
3554 : GWKResampleWrkStruct *psWrkStruct);
3555 :
3556 342 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
3557 : {
3558 342 : const int nXDist = (poWK->nXRadius + 1) * 2;
3559 342 : const int nYDist = (poWK->nYRadius + 1) * 2;
3560 :
3561 : GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
3562 342 : CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
3563 :
3564 : // Alloc space for saved X weights.
3565 342 : psWrkStruct->padfWeightsX =
3566 342 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3567 342 : psWrkStruct->pabCalcX =
3568 342 : static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
3569 :
3570 342 : psWrkStruct->padfWeightsY =
3571 342 : static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
3572 342 : psWrkStruct->iLastSrcX = -10;
3573 342 : psWrkStruct->iLastSrcY = -10;
3574 342 : psWrkStruct->dfLastDeltaX = -10;
3575 342 : psWrkStruct->dfLastDeltaY = -10;
3576 :
3577 : // Alloc space for saving a row of pixels.
3578 342 : if (poWK->pafUnifiedSrcDensity == nullptr &&
3579 314 : poWK->panUnifiedSrcValid == nullptr &&
3580 302 : poWK->papanBandSrcValid == nullptr)
3581 : {
3582 302 : psWrkStruct->padfRowDensity = nullptr;
3583 : }
3584 : else
3585 : {
3586 40 : psWrkStruct->padfRowDensity =
3587 40 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3588 : }
3589 342 : psWrkStruct->padfRowReal =
3590 342 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3591 342 : psWrkStruct->padfRowImag =
3592 342 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3593 :
3594 342 : if (poWK->eResample == GRA_Lanczos)
3595 : {
3596 63 : psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
3597 :
3598 63 : if (poWK->dfXScale < 1)
3599 : {
3600 4 : psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
3601 4 : psWrkStruct->dfSinPiXScaleOver3 =
3602 4 : sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
3603 4 : psWrkStruct->dfCosPiXScaleOver3);
3604 : // "Naive":
3605 : // const double dfCosPiXScale = cos( M_PI * dfXScale );
3606 : // const double dfSinPiXScale = sin( M_PI * dfXScale );
3607 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3608 4 : psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
3609 4 : psWrkStruct->dfCosPiXScaleOver3 -
3610 4 : 3) *
3611 4 : psWrkStruct->dfCosPiXScaleOver3;
3612 4 : psWrkStruct->dfSinPiXScale = sqrt(
3613 4 : 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
3614 : }
3615 :
3616 63 : if (poWK->dfYScale < 1)
3617 : {
3618 11 : psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
3619 11 : psWrkStruct->dfSinPiYScaleOver3 =
3620 11 : sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
3621 11 : psWrkStruct->dfCosPiYScaleOver3);
3622 : // "Naive":
3623 : // const double dfCosPiYScale = cos( M_PI * dfYScale );
3624 : // const double dfSinPiYScale = sin( M_PI * dfYScale );
3625 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3626 11 : psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
3627 11 : psWrkStruct->dfCosPiYScaleOver3 -
3628 11 : 3) *
3629 11 : psWrkStruct->dfCosPiYScaleOver3;
3630 11 : psWrkStruct->dfSinPiYScale = sqrt(
3631 11 : 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
3632 : }
3633 : }
3634 : else
3635 279 : psWrkStruct->pfnGWKResample = GWKResample;
3636 :
3637 342 : return psWrkStruct;
3638 : }
3639 :
3640 : /************************************************************************/
3641 : /* GWKResampleDeleteWrkStruct() */
3642 : /************************************************************************/
3643 :
3644 342 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
3645 : {
3646 342 : CPLFree(psWrkStruct->padfWeightsX);
3647 342 : CPLFree(psWrkStruct->padfWeightsY);
3648 342 : CPLFree(psWrkStruct->pabCalcX);
3649 342 : CPLFree(psWrkStruct->padfRowDensity);
3650 342 : CPLFree(psWrkStruct->padfRowReal);
3651 342 : CPLFree(psWrkStruct->padfRowImag);
3652 342 : CPLFree(psWrkStruct);
3653 342 : }
3654 :
3655 : /************************************************************************/
3656 : /* GWKResample() */
3657 : /************************************************************************/
3658 :
3659 239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3660 : double dfSrcY, double *pdfDensity, double *pdfReal,
3661 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
3662 :
3663 : {
3664 : // Save as local variables to avoid following pointers in loops.
3665 239383 : const int nSrcXSize = poWK->nSrcXSize;
3666 239383 : const int nSrcYSize = poWK->nSrcYSize;
3667 :
3668 239383 : double dfAccumulatorReal = 0.0;
3669 239383 : double dfAccumulatorImag = 0.0;
3670 239383 : double dfAccumulatorDensity = 0.0;
3671 239383 : double dfAccumulatorWeight = 0.0;
3672 239383 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3673 239383 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3674 239383 : const GPtrDiff_t iSrcOffset =
3675 239383 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3676 239383 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3677 239383 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3678 :
3679 239383 : const double dfXScale = poWK->dfXScale;
3680 239383 : const double dfYScale = poWK->dfYScale;
3681 :
3682 239383 : const int nXDist = (poWK->nXRadius + 1) * 2;
3683 :
3684 : // Space for saved X weights.
3685 239383 : double *padfWeightsX = psWrkStruct->padfWeightsX;
3686 239383 : bool *pabCalcX = psWrkStruct->pabCalcX;
3687 :
3688 : // Space for saving a row of pixels.
3689 239383 : double *padfRowDensity = psWrkStruct->padfRowDensity;
3690 239383 : double *padfRowReal = psWrkStruct->padfRowReal;
3691 239383 : double *padfRowImag = psWrkStruct->padfRowImag;
3692 :
3693 : // Mark as needing calculation (don't calculate the weights yet,
3694 : // because a mask may render it unnecessary).
3695 239383 : memset(pabCalcX, false, nXDist * sizeof(bool));
3696 :
3697 239383 : FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
3698 239383 : CPLAssert(pfnGetWeight);
3699 :
3700 : // Skip sampling over edge of image.
3701 239383 : int j = poWK->nFiltInitY;
3702 239383 : int jMax = poWK->nYRadius;
3703 239383 : if (iSrcY + j < 0)
3704 566 : j = -iSrcY;
3705 239383 : if (iSrcY + jMax >= nSrcYSize)
3706 662 : jMax = nSrcYSize - iSrcY - 1;
3707 :
3708 239383 : int iMin = poWK->nFiltInitX;
3709 239383 : int iMax = poWK->nXRadius;
3710 239383 : if (iSrcX + iMin < 0)
3711 566 : iMin = -iSrcX;
3712 239383 : if (iSrcX + iMax >= nSrcXSize)
3713 659 : iMax = nSrcXSize - iSrcX - 1;
3714 :
3715 239383 : const int bXScaleBelow1 = (dfXScale < 1.0);
3716 239383 : const int bYScaleBelow1 = (dfYScale < 1.0);
3717 :
3718 239383 : GPtrDiff_t iRowOffset =
3719 239383 : iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
3720 :
3721 : // Loop over pixel rows in the kernel.
3722 1445930 : for (; j <= jMax; ++j)
3723 : {
3724 1206540 : iRowOffset += nSrcXSize;
3725 :
3726 : // Get pixel values.
3727 : // We can potentially read extra elements after the "normal" end of the
3728 : // source arrays, but the contract of papabySrcImage[iBand],
3729 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
3730 : // is to have WARP_EXTRA_ELTS reserved at their end.
3731 1206540 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
3732 : padfRowDensity, padfRowReal, padfRowImag))
3733 72 : continue;
3734 :
3735 : // Calculate the Y weight.
3736 : double dfWeight1 = (bYScaleBelow1)
3737 1206470 : ? pfnGetWeight((j - dfDeltaY) * dfYScale)
3738 1600 : : pfnGetWeight(j - dfDeltaY);
3739 :
3740 : // Iterate over pixels in row.
3741 1206470 : double dfAccumulatorRealLocal = 0.0;
3742 1206470 : double dfAccumulatorImagLocal = 0.0;
3743 1206470 : double dfAccumulatorDensityLocal = 0.0;
3744 1206470 : double dfAccumulatorWeightLocal = 0.0;
3745 :
3746 7317420 : for (int i = iMin; i <= iMax; ++i)
3747 : {
3748 : // Skip sampling if pixel has zero density.
3749 6110940 : if (padfRowDensity != nullptr &&
3750 77277 : padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
3751 546 : continue;
3752 :
3753 6110400 : double dfWeight2 = 0.0;
3754 :
3755 : // Make or use a cached set of weights for this row.
3756 6110400 : if (pabCalcX[i - iMin])
3757 : {
3758 : // Use saved weight value instead of recomputing it.
3759 4903920 : dfWeight2 = padfWeightsX[i - iMin];
3760 : }
3761 : else
3762 : {
3763 : // Calculate & save the X weight.
3764 1206480 : padfWeightsX[i - iMin] = dfWeight2 =
3765 1206480 : (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
3766 1600 : : pfnGetWeight(i - dfDeltaX);
3767 :
3768 1206480 : pabCalcX[i - iMin] = true;
3769 : }
3770 :
3771 : // Accumulate!
3772 6110400 : dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
3773 6110400 : dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
3774 6110400 : if (padfRowDensity != nullptr)
3775 76731 : dfAccumulatorDensityLocal +=
3776 76731 : padfRowDensity[i - iMin] * dfWeight2;
3777 6110400 : dfAccumulatorWeightLocal += dfWeight2;
3778 : }
3779 :
3780 1206470 : dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
3781 1206470 : dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
3782 1206470 : dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
3783 1206470 : dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
3784 : }
3785 :
3786 239383 : if (dfAccumulatorWeight < 0.000001 ||
3787 1887 : (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
3788 : {
3789 0 : *pdfDensity = 0.0;
3790 0 : return false;
3791 : }
3792 :
3793 : // Calculate the output taking into account weighting.
3794 239383 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
3795 : {
3796 239380 : *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
3797 239380 : *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
3798 239380 : if (padfRowDensity != nullptr)
3799 1884 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
3800 : else
3801 237496 : *pdfDensity = 1.0;
3802 : }
3803 : else
3804 : {
3805 3 : *pdfReal = dfAccumulatorReal;
3806 3 : *pdfImag = dfAccumulatorImag;
3807 3 : if (padfRowDensity != nullptr)
3808 3 : *pdfDensity = dfAccumulatorDensity;
3809 : else
3810 0 : *pdfDensity = 1.0;
3811 : }
3812 :
3813 239383 : return true;
3814 : }
3815 :
3816 : /************************************************************************/
3817 : /* GWKResampleOptimizedLanczos() */
3818 : /************************************************************************/
3819 :
3820 617144 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3821 : double dfSrcX, double dfSrcY,
3822 : double *pdfDensity, double *pdfReal,
3823 : double *pdfImag,
3824 : GWKResampleWrkStruct *psWrkStruct)
3825 :
3826 : {
3827 : // Save as local variables to avoid following pointers in loops.
3828 617144 : const int nSrcXSize = poWK->nSrcXSize;
3829 617144 : const int nSrcYSize = poWK->nSrcYSize;
3830 :
3831 617144 : double dfAccumulatorReal = 0.0;
3832 617144 : double dfAccumulatorImag = 0.0;
3833 617144 : double dfAccumulatorDensity = 0.0;
3834 617144 : double dfAccumulatorWeight = 0.0;
3835 617144 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3836 617144 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3837 617144 : const GPtrDiff_t iSrcOffset =
3838 617144 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3839 617144 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3840 617144 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3841 :
3842 617144 : const double dfXScale = poWK->dfXScale;
3843 617144 : const double dfYScale = poWK->dfYScale;
3844 :
3845 : // Space for saved X weights.
3846 617144 : double *const padfWeightsXShifted =
3847 617144 : psWrkStruct->padfWeightsX - poWK->nFiltInitX;
3848 617144 : double *const padfWeightsYShifted =
3849 617144 : psWrkStruct->padfWeightsY - poWK->nFiltInitY;
3850 :
3851 : // Space for saving a row of pixels.
3852 617144 : double *const padfRowDensity = psWrkStruct->padfRowDensity;
3853 617144 : double *const padfRowReal = psWrkStruct->padfRowReal;
3854 617144 : double *const padfRowImag = psWrkStruct->padfRowImag;
3855 :
3856 : // Skip sampling over edge of image.
3857 617144 : int jMin = poWK->nFiltInitY;
3858 617144 : int jMax = poWK->nYRadius;
3859 617144 : if (iSrcY + jMin < 0)
3860 16522 : jMin = -iSrcY;
3861 617144 : if (iSrcY + jMax >= nSrcYSize)
3862 5782 : jMax = nSrcYSize - iSrcY - 1;
3863 :
3864 617144 : int iMin = poWK->nFiltInitX;
3865 617144 : int iMax = poWK->nXRadius;
3866 617144 : if (iSrcX + iMin < 0)
3867 15797 : iMin = -iSrcX;
3868 617144 : if (iSrcX + iMax >= nSrcXSize)
3869 4657 : iMax = nSrcXSize - iSrcX - 1;
3870 :
3871 617144 : if (dfXScale < 1.0)
3872 : {
3873 403041 : while ((iMin - dfDeltaX) * dfXScale < -3.0)
3874 200179 : iMin++;
3875 202862 : while ((iMax - dfDeltaX) * dfXScale > 3.0)
3876 0 : iMax--;
3877 :
3878 : // clang-format off
3879 : /*
3880 : Naive version:
3881 : for (int i = iMin; i <= iMax; ++i)
3882 : {
3883 : psWrkStruct->padfWeightsXShifted[i] =
3884 : GWKLanczosSinc((i - dfDeltaX) * dfXScale);
3885 : }
3886 :
3887 : but given that:
3888 :
3889 : GWKLanczosSinc(x):
3890 : if (dfX == 0.0)
3891 : return 1.0;
3892 :
3893 : const double dfPIX = M_PI * dfX;
3894 : const double dfPIXoverR = dfPIX / 3;
3895 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3896 : return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
3897 :
3898 : and
3899 : sin (a + b) = sin a cos b + cos a sin b.
3900 : cos (a + b) = cos a cos b - sin a sin b.
3901 :
3902 : we can skip any sin() computation within the loop
3903 : */
3904 : // clang-format on
3905 :
3906 202862 : if (iSrcX != psWrkStruct->iLastSrcX ||
3907 131072 : dfDeltaX != psWrkStruct->dfLastDeltaX)
3908 : {
3909 71790 : double dfX = (iMin - dfDeltaX) * dfXScale;
3910 :
3911 71790 : double dfPIXover3 = M_PI / 3 * dfX;
3912 71790 : double dfCosOver3 = cos(dfPIXover3);
3913 71790 : double dfSinOver3 = sin(dfPIXover3);
3914 :
3915 : // "Naive":
3916 : // double dfSin = sin( M_PI * dfX );
3917 : // double dfCos = cos( M_PI * dfX );
3918 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
3919 71790 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
3920 71790 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
3921 :
3922 71790 : const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
3923 71790 : const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
3924 71790 : const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
3925 71790 : const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
3926 71790 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
3927 71790 : padfWeightsXShifted[iMin] =
3928 71790 : dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
3929 1636480 : for (int i = iMin + 1; i <= iMax; ++i)
3930 : {
3931 1564690 : dfX += dfXScale;
3932 1564690 : const double dfNewSin =
3933 1564690 : dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
3934 1564690 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
3935 1564690 : dfCosOver3 * dfSinPiXScaleOver3;
3936 1564690 : padfWeightsXShifted[i] =
3937 : dfX == 0
3938 1564690 : ? 1.0
3939 1564690 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
3940 1564690 : const double dfNewCos =
3941 1564690 : dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
3942 1564690 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
3943 1564690 : dfSinOver3 * dfSinPiXScaleOver3;
3944 1564690 : dfSin = dfNewSin;
3945 1564690 : dfCos = dfNewCos;
3946 1564690 : dfSinOver3 = dfNewSinOver3;
3947 1564690 : dfCosOver3 = dfNewCosOver3;
3948 : }
3949 :
3950 71790 : psWrkStruct->iLastSrcX = iSrcX;
3951 71790 : psWrkStruct->dfLastDeltaX = dfDeltaX;
3952 : }
3953 : }
3954 : else
3955 : {
3956 757542 : while (iMin - dfDeltaX < -3.0)
3957 343260 : iMin++;
3958 414282 : while (iMax - dfDeltaX > 3.0)
3959 0 : iMax--;
3960 :
3961 414282 : if (iSrcX != psWrkStruct->iLastSrcX ||
3962 209580 : dfDeltaX != psWrkStruct->dfLastDeltaX)
3963 : {
3964 : // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
3965 : // following trigonometric formulas.
3966 :
3967 : // TODO(schwehr): Move this somewhere where it can be rendered at
3968 : // LaTeX.
3969 : // clang-format off
3970 : // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
3971 : // cos(M_PI * dfBase) * sin(M_PI * k)
3972 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
3973 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
3974 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
3975 :
3976 : // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
3977 : // cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
3978 : // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
3979 : // clang-format on
3980 :
3981 414282 : const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
3982 414282 : const double dfSin2PIDeltaXOver3 =
3983 : dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
3984 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
3985 414282 : const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
3986 414282 : const double dfSinPIDeltaX =
3987 414282 : (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
3988 414282 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
3989 414282 : const double dfInvPI2Over3xSinPIDeltaX =
3990 : dfInvPI2Over3 * dfSinPIDeltaX;
3991 414282 : const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
3992 414282 : -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
3993 414282 : const double dfSinPIOver3 = 0.8660254037844386;
3994 414282 : const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
3995 414282 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
3996 : const double padfCst[] = {
3997 414282 : dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
3998 414282 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
3999 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
4000 414282 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
4001 414282 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
4002 :
4003 2936860 : for (int i = iMin; i <= iMax; ++i)
4004 : {
4005 2522570 : const double dfX = i - dfDeltaX;
4006 2522570 : if (dfX == 0.0)
4007 58282 : padfWeightsXShifted[i] = 1.0;
4008 : else
4009 2464290 : padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
4010 : #if DEBUG_VERBOSE
4011 : // TODO(schwehr): AlmostEqual.
4012 : // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
4013 : // GWKLanczosSinc(dfX, 3.0)) < 1e-10);
4014 : #endif
4015 : }
4016 :
4017 414282 : psWrkStruct->iLastSrcX = iSrcX;
4018 414282 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4019 : }
4020 : }
4021 :
4022 617144 : if (dfYScale < 1.0)
4023 : {
4024 403116 : while ((jMin - dfDeltaY) * dfYScale < -3.0)
4025 200254 : jMin++;
4026 202862 : while ((jMax - dfDeltaY) * dfYScale > 3.0)
4027 0 : jMax--;
4028 :
4029 : // clang-format off
4030 : /*
4031 : Naive version:
4032 : for (int j = jMin; j <= jMax; ++j)
4033 : {
4034 : padfWeightsYShifted[j] =
4035 : GWKLanczosSinc((j - dfDeltaY) * dfYScale);
4036 : }
4037 : */
4038 : // clang-format on
4039 :
4040 202862 : if (iSrcY != psWrkStruct->iLastSrcY ||
4041 202479 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4042 : {
4043 383 : double dfY = (jMin - dfDeltaY) * dfYScale;
4044 :
4045 383 : double dfPIYover3 = M_PI / 3 * dfY;
4046 383 : double dfCosOver3 = cos(dfPIYover3);
4047 383 : double dfSinOver3 = sin(dfPIYover3);
4048 :
4049 : // "Naive":
4050 : // double dfSin = sin( M_PI * dfY );
4051 : // double dfCos = cos( M_PI * dfY );
4052 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4053 383 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4054 383 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4055 :
4056 383 : const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
4057 383 : const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
4058 383 : const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
4059 383 : const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
4060 383 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4061 383 : padfWeightsYShifted[jMin] =
4062 383 : dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
4063 7318 : for (int j = jMin + 1; j <= jMax; ++j)
4064 : {
4065 6935 : dfY += dfYScale;
4066 6935 : const double dfNewSin =
4067 6935 : dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
4068 6935 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
4069 6935 : dfCosOver3 * dfSinPiYScaleOver3;
4070 6935 : padfWeightsYShifted[j] =
4071 : dfY == 0
4072 6935 : ? 1.0
4073 6935 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
4074 6935 : const double dfNewCos =
4075 6935 : dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
4076 6935 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
4077 6935 : dfSinOver3 * dfSinPiYScaleOver3;
4078 6935 : dfSin = dfNewSin;
4079 6935 : dfCos = dfNewCos;
4080 6935 : dfSinOver3 = dfNewSinOver3;
4081 6935 : dfCosOver3 = dfNewCosOver3;
4082 : }
4083 :
4084 383 : psWrkStruct->iLastSrcY = iSrcY;
4085 383 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4086 : }
4087 : }
4088 : else
4089 : {
4090 684742 : while (jMin - dfDeltaY < -3.0)
4091 270460 : jMin++;
4092 414282 : while (jMax - dfDeltaY > 3.0)
4093 0 : jMax--;
4094 :
4095 414282 : if (iSrcY != psWrkStruct->iLastSrcY ||
4096 413663 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4097 : {
4098 1132 : const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
4099 1132 : const double dfSin2PIDeltaYOver3 =
4100 : dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
4101 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
4102 1132 : const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
4103 1132 : const double dfSinPIDeltaY =
4104 1132 : (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
4105 1132 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4106 1132 : const double dfInvPI2Over3xSinPIDeltaY =
4107 : dfInvPI2Over3 * dfSinPIDeltaY;
4108 1132 : const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
4109 1132 : -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
4110 1132 : const double dfSinPIOver3 = 0.8660254037844386;
4111 1132 : const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
4112 1132 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
4113 : const double padfCst[] = {
4114 1132 : dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
4115 1132 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
4116 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
4117 1132 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
4118 1132 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
4119 :
4120 7917 : for (int j = jMin; j <= jMax; ++j)
4121 : {
4122 6785 : const double dfY = j - dfDeltaY;
4123 6785 : if (dfY == 0.0)
4124 460 : padfWeightsYShifted[j] = 1.0;
4125 : else
4126 6325 : padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
4127 : #if DEBUG_VERBOSE
4128 : // TODO(schwehr): AlmostEqual.
4129 : // CPLAssert(fabs(padfWeightsYShifted[j] -
4130 : // GWKLanczosSinc(dfY, 3.0)) < 1e-10);
4131 : #endif
4132 : }
4133 :
4134 1132 : psWrkStruct->iLastSrcY = iSrcY;
4135 1132 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4136 : }
4137 : }
4138 :
4139 : // If we have no density information, we can simply compute the
4140 : // accumulated weight.
4141 617144 : if (padfRowDensity == nullptr)
4142 : {
4143 617144 : double dfRowAccWeight = 0.0;
4144 7903490 : for (int i = iMin; i <= iMax; ++i)
4145 : {
4146 7286350 : dfRowAccWeight += padfWeightsXShifted[i];
4147 : }
4148 617144 : double dfColAccWeight = 0.0;
4149 7958040 : for (int j = jMin; j <= jMax; ++j)
4150 : {
4151 7340900 : dfColAccWeight += padfWeightsYShifted[j];
4152 : }
4153 617144 : dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
4154 : }
4155 :
4156 : // Loop over pixel rows in the kernel.
4157 :
4158 617144 : if (poWK->eWorkingDataType == GDT_Byte && !poWK->panUnifiedSrcValid &&
4159 616524 : !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
4160 : !padfRowDensity)
4161 : {
4162 : // Optimization for Byte case without any masking/alpha
4163 :
4164 616524 : if (dfAccumulatorWeight < 0.000001)
4165 : {
4166 0 : *pdfDensity = 0.0;
4167 0 : return false;
4168 : }
4169 :
4170 616524 : const GByte *pSrc =
4171 616524 : reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
4172 616524 : pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4173 :
4174 : #if defined(USE_SSE2)
4175 616524 : if (iMax - iMin + 1 == 6)
4176 : {
4177 : // This is just an optimized version of the general case in
4178 : // the else clause.
4179 :
4180 346854 : pSrc += iMin;
4181 346854 : int j = jMin;
4182 : const auto fourXWeights =
4183 346854 : XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
4184 :
4185 : // Process 2 lines at the same time.
4186 1375860 : for (; j < jMax; j += 2)
4187 : {
4188 : const XMMReg4Double v_acc =
4189 1029000 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4190 : const XMMReg4Double v_acc2 =
4191 1029000 : XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
4192 1029000 : const double dfRowAcc = v_acc.GetHorizSum();
4193 1029000 : const double dfRowAccEnd =
4194 1029000 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4195 1029000 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4196 1029000 : dfAccumulatorReal +=
4197 1029000 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4198 1029000 : const double dfRowAcc2 = v_acc2.GetHorizSum();
4199 1029000 : const double dfRowAcc2End =
4200 1029000 : pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
4201 1029000 : pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
4202 1029000 : dfAccumulatorReal +=
4203 1029000 : (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
4204 1029000 : pSrc += 2 * nSrcXSize;
4205 : }
4206 346854 : if (j == jMax)
4207 : {
4208 : // Process last line if there's an odd number of them.
4209 :
4210 : const XMMReg4Double v_acc =
4211 86045 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4212 86045 : const double dfRowAcc = v_acc.GetHorizSum();
4213 86045 : const double dfRowAccEnd =
4214 86045 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4215 86045 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4216 86045 : dfAccumulatorReal +=
4217 86045 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4218 : }
4219 : }
4220 : else
4221 : #endif
4222 : {
4223 5463580 : for (int j = jMin; j <= jMax; ++j)
4224 : {
4225 5193900 : int i = iMin;
4226 5193900 : double dfRowAcc1 = 0.0;
4227 5193900 : double dfRowAcc2 = 0.0;
4228 : // A bit of loop unrolling
4229 62750600 : for (; i < iMax; i += 2)
4230 : {
4231 57556700 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4232 57556700 : dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
4233 : }
4234 5193900 : if (i == iMax)
4235 : {
4236 : // Process last column if there's an odd number of them.
4237 426183 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4238 : }
4239 :
4240 5193900 : dfAccumulatorReal +=
4241 5193900 : (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
4242 5193900 : pSrc += nSrcXSize;
4243 : }
4244 : }
4245 :
4246 : // Calculate the output taking into account weighting.
4247 616524 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4248 : {
4249 569230 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4250 569230 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4251 569230 : *pdfDensity = 1.0;
4252 : }
4253 : else
4254 : {
4255 47294 : *pdfReal = dfAccumulatorReal;
4256 47294 : *pdfDensity = 1.0;
4257 : }
4258 :
4259 616524 : return true;
4260 : }
4261 :
4262 620 : GPtrDiff_t iRowOffset =
4263 620 : iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
4264 :
4265 620 : int nCountValid = 0;
4266 620 : const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
4267 :
4268 3560 : for (int j = jMin; j <= jMax; ++j)
4269 : {
4270 2940 : iRowOffset += nSrcXSize;
4271 :
4272 : // Get pixel values.
4273 : // We can potentially read extra elements after the "normal" end of the
4274 : // source arrays, but the contract of papabySrcImage[iBand],
4275 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4276 : // is to have WARP_EXTRA_ELTS reserved at their end.
4277 2940 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4278 : padfRowDensity, padfRowReal, padfRowImag))
4279 0 : continue;
4280 :
4281 2940 : const double dfWeight1 = padfWeightsYShifted[j];
4282 :
4283 : // Iterate over pixels in row.
4284 2940 : if (padfRowDensity != nullptr)
4285 : {
4286 0 : for (int i = iMin; i <= iMax; ++i)
4287 : {
4288 : // Skip sampling if pixel has zero density.
4289 0 : if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
4290 0 : continue;
4291 :
4292 0 : nCountValid++;
4293 :
4294 : // Use a cached set of weights for this row.
4295 0 : const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
4296 :
4297 : // Accumulate!
4298 0 : dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
4299 0 : dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
4300 0 : dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
4301 0 : dfAccumulatorWeight += dfWeight2;
4302 : }
4303 : }
4304 2940 : else if (bIsNonComplex)
4305 : {
4306 1764 : double dfRowAccReal = 0.0;
4307 10560 : for (int i = iMin; i <= iMax; ++i)
4308 : {
4309 8796 : const double dfWeight2 = padfWeightsXShifted[i];
4310 :
4311 : // Accumulate!
4312 8796 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4313 : }
4314 :
4315 1764 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4316 : }
4317 : else
4318 : {
4319 1176 : double dfRowAccReal = 0.0;
4320 1176 : double dfRowAccImag = 0.0;
4321 7040 : for (int i = iMin; i <= iMax; ++i)
4322 : {
4323 5864 : const double dfWeight2 = padfWeightsXShifted[i];
4324 :
4325 : // Accumulate!
4326 5864 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4327 5864 : dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
4328 : }
4329 :
4330 1176 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4331 1176 : dfAccumulatorImag += dfRowAccImag * dfWeight1;
4332 : }
4333 : }
4334 :
4335 620 : if (dfAccumulatorWeight < 0.000001 ||
4336 0 : (padfRowDensity != nullptr &&
4337 0 : (dfAccumulatorDensity < 0.000001 ||
4338 0 : nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
4339 : {
4340 0 : *pdfDensity = 0.0;
4341 0 : return false;
4342 : }
4343 :
4344 : // Calculate the output taking into account weighting.
4345 620 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4346 : {
4347 0 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4348 0 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4349 0 : *pdfImag = dfAccumulatorImag * dfInvAcc;
4350 0 : if (padfRowDensity != nullptr)
4351 0 : *pdfDensity = dfAccumulatorDensity * dfInvAcc;
4352 : else
4353 0 : *pdfDensity = 1.0;
4354 : }
4355 : else
4356 : {
4357 620 : *pdfReal = dfAccumulatorReal;
4358 620 : *pdfImag = dfAccumulatorImag;
4359 620 : if (padfRowDensity != nullptr)
4360 0 : *pdfDensity = dfAccumulatorDensity;
4361 : else
4362 620 : *pdfDensity = 1.0;
4363 : }
4364 :
4365 620 : return true;
4366 : }
4367 :
4368 : /************************************************************************/
4369 : /* GWKComputeWeights() */
4370 : /************************************************************************/
4371 :
4372 3746130 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
4373 : double dfDeltaX, double dfXScale, int jMin,
4374 : int jMax, double dfDeltaY, double dfYScale,
4375 : double *padfWeightsHorizontal,
4376 : double *padfWeightsVertical, double &dfInvWeights)
4377 : {
4378 :
4379 3746130 : const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
4380 3746130 : CPLAssert(pfnGetWeight);
4381 3746130 : const FilterFunc4ValuesType pfnGetWeight4Values =
4382 3746130 : apfGWKFilter4Values[eResample];
4383 3746130 : CPLAssert(pfnGetWeight4Values);
4384 :
4385 3746130 : int i = iMin; // Used after for.
4386 3746130 : int iC = 0; // Used after for.
4387 : // Not zero, but as close as possible to it, to avoid potential division by
4388 : // zero at end of function
4389 3746130 : double dfAccumulatorWeightHorizontal = std::numeric_limits<double>::min();
4390 8314040 : for (; i + 2 < iMax; i += 4, iC += 4)
4391 : {
4392 4566400 : padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
4393 4566400 : padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
4394 4566400 : padfWeightsHorizontal[iC + 2] =
4395 4566400 : padfWeightsHorizontal[iC + 1] + dfXScale;
4396 4566400 : padfWeightsHorizontal[iC + 3] =
4397 4566400 : padfWeightsHorizontal[iC + 2] + dfXScale;
4398 4567910 : dfAccumulatorWeightHorizontal +=
4399 4566400 : pfnGetWeight4Values(padfWeightsHorizontal + iC);
4400 : }
4401 3953690 : for (; i <= iMax; ++i, ++iC)
4402 : {
4403 220112 : const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
4404 206053 : padfWeightsHorizontal[iC] = dfWeight;
4405 206053 : dfAccumulatorWeightHorizontal += dfWeight;
4406 : }
4407 :
4408 3733580 : int j = jMin; // Used after for.
4409 3733580 : int jC = 0; // Used after for.
4410 : // Not zero, but as close as possible to it, to avoid potential division by
4411 : // zero at end of function
4412 3733580 : double dfAccumulatorWeightVertical = std::numeric_limits<double>::min();
4413 7880320 : for (; j + 2 < jMax; j += 4, jC += 4)
4414 : {
4415 4143760 : padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
4416 4143760 : padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
4417 4143760 : padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
4418 4143760 : padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
4419 4146740 : dfAccumulatorWeightVertical +=
4420 4143760 : pfnGetWeight4Values(padfWeightsVertical + jC);
4421 : }
4422 8238600 : for (; j <= jMax; ++j, ++jC)
4423 : {
4424 4496340 : const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
4425 4502030 : padfWeightsVertical[jC] = dfWeight;
4426 4502030 : dfAccumulatorWeightVertical += dfWeight;
4427 : }
4428 :
4429 3742250 : dfInvWeights =
4430 3742250 : 1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
4431 3742250 : }
4432 :
4433 : /************************************************************************/
4434 : /* GWKResampleNoMasksT() */
4435 : /************************************************************************/
4436 :
4437 : template <class T>
4438 : static bool
4439 : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4440 : double dfSrcY, T *pValue, double *padfWeightsHorizontal,
4441 : double *padfWeightsVertical, double &dfInvWeights)
4442 :
4443 : {
4444 : // Commonly used; save locally.
4445 : const int nSrcXSize = poWK->nSrcXSize;
4446 : const int nSrcYSize = poWK->nSrcYSize;
4447 :
4448 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4449 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4450 : const GPtrDiff_t iSrcOffset =
4451 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4452 :
4453 : const int nXRadius = poWK->nXRadius;
4454 : const int nYRadius = poWK->nYRadius;
4455 :
4456 : // Politely refuse to process invalid coordinates or obscenely small image.
4457 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4458 : nYRadius > nSrcYSize)
4459 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4460 : pValue);
4461 :
4462 : T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
4463 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4464 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4465 :
4466 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4467 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4468 :
4469 : int iMin = 1 - nXRadius;
4470 : if (iSrcX + iMin < 0)
4471 : iMin = -iSrcX;
4472 : int iMax = nXRadius;
4473 : if (iSrcX + iMax >= nSrcXSize - 1)
4474 : iMax = nSrcXSize - 1 - iSrcX;
4475 :
4476 : int jMin = 1 - nYRadius;
4477 : if (iSrcY + jMin < 0)
4478 : jMin = -iSrcY;
4479 : int jMax = nYRadius;
4480 : if (iSrcY + jMax >= nSrcYSize - 1)
4481 : jMax = nSrcYSize - 1 - iSrcY;
4482 :
4483 : if (iBand == 0)
4484 : {
4485 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4486 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4487 : padfWeightsVertical, dfInvWeights);
4488 : }
4489 :
4490 : // Loop over all rows in the kernel.
4491 : double dfAccumulator = 0.0;
4492 : for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
4493 : {
4494 : const GPtrDiff_t iSampJ =
4495 : iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
4496 :
4497 : // Loop over all pixels in the row.
4498 : double dfAccumulatorLocal = 0.0;
4499 : double dfAccumulatorLocal2 = 0.0;
4500 : int iC = 0;
4501 : int i = iMin;
4502 : // Process by chunk of 4 cols.
4503 : for (; i + 2 < iMax; i += 4, iC += 4)
4504 : {
4505 : // Retrieve the pixel & accumulate.
4506 : dfAccumulatorLocal +=
4507 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4508 : dfAccumulatorLocal +=
4509 : pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4510 : dfAccumulatorLocal2 +=
4511 : pSrcBand[i + 2 + iSampJ] * padfWeightsHorizontal[iC + 2];
4512 : dfAccumulatorLocal2 +=
4513 : pSrcBand[i + 3 + iSampJ] * padfWeightsHorizontal[iC + 3];
4514 : }
4515 : dfAccumulatorLocal += dfAccumulatorLocal2;
4516 : if (i < iMax)
4517 : {
4518 : dfAccumulatorLocal +=
4519 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4520 : dfAccumulatorLocal +=
4521 : pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4522 : i += 2;
4523 : iC += 2;
4524 : }
4525 : if (i == iMax)
4526 : {
4527 : dfAccumulatorLocal +=
4528 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4529 : }
4530 :
4531 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4532 : }
4533 :
4534 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4535 :
4536 : return true;
4537 : }
4538 :
4539 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
4540 : /* Could possibly be used too on 32bit, but we would need to check at runtime */
4541 : #if defined(USE_SSE2)
4542 :
4543 : /************************************************************************/
4544 : /* GWKResampleNoMasks_SSE2_T() */
4545 : /************************************************************************/
4546 :
4547 : template <class T>
4548 9092963 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
4549 : double dfSrcX, double dfSrcY, T *pValue,
4550 : double *padfWeightsHorizontal,
4551 : double *padfWeightsVertical,
4552 : double &dfInvWeights)
4553 : {
4554 : // Commonly used; save locally.
4555 9092963 : const int nSrcXSize = poWK->nSrcXSize;
4556 9092963 : const int nSrcYSize = poWK->nSrcYSize;
4557 :
4558 9092963 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4559 9092963 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4560 9092963 : const GPtrDiff_t iSrcOffset =
4561 9092963 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4562 9092963 : const int nXRadius = poWK->nXRadius;
4563 9092963 : const int nYRadius = poWK->nYRadius;
4564 :
4565 : // Politely refuse to process invalid coordinates or obscenely small image.
4566 9092963 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4567 : nYRadius > nSrcYSize)
4568 2 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4569 3 : pValue);
4570 :
4571 9169401 : const T *pSrcBand =
4572 9169401 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
4573 :
4574 9169401 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4575 9169401 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4576 9169401 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4577 9165771 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4578 :
4579 9131601 : int iMin = 1 - nXRadius;
4580 9131601 : if (iSrcX + iMin < 0)
4581 43143 : iMin = -iSrcX;
4582 9131601 : int iMax = nXRadius;
4583 9131601 : if (iSrcX + iMax >= nSrcXSize - 1)
4584 38106 : iMax = nSrcXSize - 1 - iSrcX;
4585 :
4586 9131601 : int jMin = 1 - nYRadius;
4587 9131601 : if (iSrcY + jMin < 0)
4588 49554 : jMin = -iSrcY;
4589 9131601 : int jMax = nYRadius;
4590 9131601 : if (iSrcY + jMax >= nSrcYSize - 1)
4591 36028 : jMax = nSrcYSize - 1 - iSrcY;
4592 :
4593 9131601 : if (iBand == 0)
4594 : {
4595 3744881 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4596 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4597 : padfWeightsVertical, dfInvWeights);
4598 : }
4599 :
4600 9112371 : GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4601 : // Process by chunk of 4 rows.
4602 9112371 : int jC = 0;
4603 9112371 : int j = jMin;
4604 9112371 : double dfAccumulator = 0.0;
4605 19367593 : for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
4606 : {
4607 : // Loop over all pixels in the row.
4608 10257712 : int iC = 0;
4609 10257712 : int i = iMin;
4610 : // Process by chunk of 4 cols.
4611 10257712 : XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
4612 10219462 : XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
4613 10219632 : XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
4614 10209202 : XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
4615 26722480 : for (; i + 2 < iMax; i += 4, iC += 4)
4616 : {
4617 : // Retrieve the pixel & accumulate.
4618 16484588 : XMMReg4Double v_pixels_1 =
4619 16484588 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4620 16506988 : XMMReg4Double v_pixels_2 =
4621 16506988 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
4622 16498388 : XMMReg4Double v_pixels_3 =
4623 16498388 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4624 16471188 : XMMReg4Double v_pixels_4 =
4625 16471188 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4626 :
4627 16509588 : XMMReg4Double v_padfWeight =
4628 16509588 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4629 :
4630 16503388 : v_acc_1 += v_pixels_1 * v_padfWeight;
4631 16488788 : v_acc_2 += v_pixels_2 * v_padfWeight;
4632 16499888 : v_acc_3 += v_pixels_3 * v_padfWeight;
4633 16504388 : v_acc_4 += v_pixels_4 * v_padfWeight;
4634 : }
4635 :
4636 10237882 : if (i < iMax)
4637 : {
4638 142910 : XMMReg2Double v_pixels_1 =
4639 142910 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
4640 142910 : XMMReg2Double v_pixels_2 =
4641 142910 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
4642 142910 : XMMReg2Double v_pixels_3 =
4643 142910 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4644 142910 : XMMReg2Double v_pixels_4 =
4645 142910 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4646 :
4647 142910 : XMMReg2Double v_padfWeight =
4648 142910 : XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
4649 :
4650 142910 : v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
4651 142910 : v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
4652 142910 : v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
4653 142910 : v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
4654 :
4655 142910 : i += 2;
4656 142910 : iC += 2;
4657 : }
4658 :
4659 10237882 : double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
4660 10240892 : double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
4661 10243262 : double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
4662 10253112 : double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
4663 :
4664 10255172 : if (i == iMax)
4665 : {
4666 49195 : dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
4667 49195 : padfWeightsHorizontal[iC];
4668 49195 : dfAccumulatorLocal_2 +=
4669 49195 : static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
4670 49195 : padfWeightsHorizontal[iC];
4671 49195 : dfAccumulatorLocal_3 +=
4672 49195 : static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
4673 49195 : padfWeightsHorizontal[iC];
4674 49195 : dfAccumulatorLocal_4 +=
4675 49195 : static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
4676 49195 : padfWeightsHorizontal[iC];
4677 : }
4678 :
4679 10255172 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
4680 10255172 : dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
4681 10255172 : dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
4682 10255172 : dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
4683 : }
4684 22104241 : for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
4685 : {
4686 : // Loop over all pixels in the row.
4687 12930740 : int iC = 0;
4688 12930740 : int i = iMin;
4689 : // Process by chunk of 4 cols.
4690 12930740 : XMMReg4Double v_acc = XMMReg4Double::Zero();
4691 26195463 : for (; i + 2 < iMax; i += 4, iC += 4)
4692 : {
4693 : // Retrieve the pixel & accumulate.
4694 13086923 : XMMReg4Double v_pixels =
4695 13086923 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4696 13098023 : XMMReg4Double v_padfWeight =
4697 13098023 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4698 :
4699 13053223 : v_acc += v_pixels * v_padfWeight;
4700 : }
4701 :
4702 13108540 : double dfAccumulatorLocal = v_acc.GetHorizSum();
4703 :
4704 12994340 : if (i < iMax)
4705 : {
4706 173964 : dfAccumulatorLocal +=
4707 173964 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4708 173964 : dfAccumulatorLocal +=
4709 173964 : pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4710 173964 : i += 2;
4711 173964 : iC += 2;
4712 : }
4713 12994340 : if (i == iMax)
4714 : {
4715 33020 : dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
4716 33020 : padfWeightsHorizontal[iC];
4717 : }
4718 :
4719 12994340 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4720 : }
4721 :
4722 9173531 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4723 :
4724 9143211 : return true;
4725 : }
4726 :
4727 : /************************************************************************/
4728 : /* GWKResampleNoMasksT<GByte>() */
4729 : /************************************************************************/
4730 :
4731 : template <>
4732 8578220 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
4733 : double dfSrcX, double dfSrcY, GByte *pValue,
4734 : double *padfWeightsHorizontal,
4735 : double *padfWeightsVertical,
4736 : double &dfInvWeights)
4737 : {
4738 8578220 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4739 : padfWeightsHorizontal, padfWeightsVertical,
4740 8578280 : dfInvWeights);
4741 : }
4742 :
4743 : /************************************************************************/
4744 : /* GWKResampleNoMasksT<GInt16>() */
4745 : /************************************************************************/
4746 :
4747 : template <>
4748 252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
4749 : double dfSrcX, double dfSrcY, GInt16 *pValue,
4750 : double *padfWeightsHorizontal,
4751 : double *padfWeightsVertical,
4752 : double &dfInvWeights)
4753 : {
4754 252563 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4755 : padfWeightsHorizontal, padfWeightsVertical,
4756 252563 : dfInvWeights);
4757 : }
4758 :
4759 : /************************************************************************/
4760 : /* GWKResampleNoMasksT<GUInt16>() */
4761 : /************************************************************************/
4762 :
4763 : template <>
4764 343440 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
4765 : double dfSrcX, double dfSrcY, GUInt16 *pValue,
4766 : double *padfWeightsHorizontal,
4767 : double *padfWeightsVertical,
4768 : double &dfInvWeights)
4769 : {
4770 343440 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4771 : padfWeightsHorizontal, padfWeightsVertical,
4772 343440 : dfInvWeights);
4773 : }
4774 :
4775 : /************************************************************************/
4776 : /* GWKResampleNoMasksT<float>() */
4777 : /************************************************************************/
4778 :
4779 : template <>
4780 2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
4781 : double dfSrcX, double dfSrcY, float *pValue,
4782 : double *padfWeightsHorizontal,
4783 : double *padfWeightsVertical,
4784 : double &dfInvWeights)
4785 : {
4786 2500 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4787 : padfWeightsHorizontal, padfWeightsVertical,
4788 2500 : dfInvWeights);
4789 : }
4790 :
4791 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
4792 :
4793 : /************************************************************************/
4794 : /* GWKResampleNoMasksT<double>() */
4795 : /************************************************************************/
4796 :
4797 : template <>
4798 : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
4799 : double dfSrcX, double dfSrcY, double *pValue,
4800 : double *padfWeightsHorizontal,
4801 : double *padfWeightsVertical,
4802 : double &dfInvWeights)
4803 : {
4804 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4805 : padfWeightsHorizontal, padfWeightsVertical,
4806 : dfInvWeights);
4807 : }
4808 :
4809 : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
4810 :
4811 : #endif /* defined(USE_SSE2) */
4812 :
4813 : /************************************************************************/
4814 : /* GWKRoundSourceCoordinates() */
4815 : /************************************************************************/
4816 :
4817 1000 : static void GWKRoundSourceCoordinates(
4818 : int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
4819 : double dfSrcCoordPrecision, double dfErrorThreshold,
4820 : GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
4821 : double dfDstY)
4822 : {
4823 1000 : double dfPct = 0.8;
4824 1000 : if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
4825 : {
4826 1000 : dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
4827 : }
4828 1000 : const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
4829 :
4830 501000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
4831 : {
4832 500000 : const double dfXBefore = padfX[iDstX];
4833 500000 : const double dfYBefore = padfY[iDstX];
4834 500000 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4835 : dfSrcCoordPrecision;
4836 500000 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4837 : dfSrcCoordPrecision;
4838 :
4839 : // If we are in an uncertainty zone, go to non-approximated
4840 : // transformation.
4841 : // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
4842 : // be at least 10 times greater than the approximation error.
4843 500000 : if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
4844 399914 : fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
4845 : {
4846 180090 : padfX[iDstX] = iDstX + dfDstXOff;
4847 180090 : padfY[iDstX] = dfDstY;
4848 180090 : padfZ[iDstX] = 0.0;
4849 180090 : pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
4850 180090 : padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
4851 180090 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4852 : dfSrcCoordPrecision;
4853 180090 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4854 : dfSrcCoordPrecision;
4855 : }
4856 : }
4857 1000 : }
4858 :
4859 : /************************************************************************/
4860 : /* GWKOpenCLCase() */
4861 : /* */
4862 : /* This is identical to GWKGeneralCase(), but functions via */
4863 : /* OpenCL. This means we have vector optimization (SSE) and/or */
4864 : /* GPU optimization depending on our prefs. The code itself is */
4865 : /* general and not optimized, but by defining constants we can */
4866 : /* make some pretty darn good code on the fly. */
4867 : /************************************************************************/
4868 :
4869 : #if defined(HAVE_OPENCL)
4870 0 : static CPLErr GWKOpenCLCase(GDALWarpKernel *poWK)
4871 : {
4872 0 : const int nDstXSize = poWK->nDstXSize;
4873 0 : const int nDstYSize = poWK->nDstYSize;
4874 0 : const int nSrcXSize = poWK->nSrcXSize;
4875 0 : const int nSrcYSize = poWK->nSrcYSize;
4876 0 : const int nDstXOff = poWK->nDstXOff;
4877 0 : const int nDstYOff = poWK->nDstYOff;
4878 0 : const int nSrcXOff = poWK->nSrcXOff;
4879 0 : const int nSrcYOff = poWK->nSrcYOff;
4880 0 : bool bUseImag = false;
4881 :
4882 : cl_channel_type imageFormat;
4883 0 : switch (poWK->eWorkingDataType)
4884 : {
4885 0 : case GDT_Byte:
4886 0 : imageFormat = CL_UNORM_INT8;
4887 0 : break;
4888 0 : case GDT_UInt16:
4889 0 : imageFormat = CL_UNORM_INT16;
4890 0 : break;
4891 0 : case GDT_CInt16:
4892 0 : bUseImag = true;
4893 : [[fallthrough]];
4894 0 : case GDT_Int16:
4895 0 : imageFormat = CL_SNORM_INT16;
4896 0 : break;
4897 0 : case GDT_CFloat32:
4898 0 : bUseImag = true;
4899 : [[fallthrough]];
4900 0 : case GDT_Float32:
4901 0 : imageFormat = CL_FLOAT;
4902 0 : break;
4903 0 : default:
4904 : // No support for higher precision formats.
4905 0 : CPLDebug("OpenCL", "Unsupported resampling OpenCL data type %d.",
4906 0 : static_cast<int>(poWK->eWorkingDataType));
4907 0 : return CE_Warning;
4908 : }
4909 :
4910 : OCLResampAlg resampAlg;
4911 0 : switch (poWK->eResample)
4912 : {
4913 0 : case GRA_Bilinear:
4914 0 : resampAlg = OCL_Bilinear;
4915 0 : break;
4916 0 : case GRA_Cubic:
4917 0 : resampAlg = OCL_Cubic;
4918 0 : break;
4919 0 : case GRA_CubicSpline:
4920 0 : resampAlg = OCL_CubicSpline;
4921 0 : break;
4922 0 : case GRA_Lanczos:
4923 0 : resampAlg = OCL_Lanczos;
4924 0 : break;
4925 0 : default:
4926 : // No support for higher precision formats.
4927 0 : CPLDebug("OpenCL",
4928 : "Unsupported resampling OpenCL resampling alg %d.",
4929 0 : static_cast<int>(poWK->eResample));
4930 0 : return CE_Warning;
4931 : }
4932 :
4933 0 : struct oclWarper *warper = nullptr;
4934 : cl_int err;
4935 0 : CPLErr eErr = CE_None;
4936 :
4937 : // TODO(schwehr): Fix indenting.
4938 : try
4939 : {
4940 :
4941 : // Using a factor of 2 or 4 seems to have much less rounding error
4942 : // than 3 on the GPU.
4943 : // Then the rounding error can cause strange artifacts under the
4944 : // right conditions.
4945 0 : warper = GDALWarpKernelOpenCL_createEnv(
4946 : nSrcXSize, nSrcYSize, nDstXSize, nDstYSize, imageFormat,
4947 0 : poWK->nBands, 4, bUseImag, poWK->papanBandSrcValid != nullptr,
4948 : poWK->pafDstDensity, poWK->padfDstNoDataReal, resampAlg, &err);
4949 :
4950 0 : if (err != CL_SUCCESS || warper == nullptr)
4951 : {
4952 0 : eErr = CE_Warning;
4953 0 : if (warper != nullptr)
4954 0 : throw eErr;
4955 0 : return eErr;
4956 : }
4957 :
4958 0 : CPLDebug("GDAL",
4959 : "GDALWarpKernel()::GWKOpenCLCase() "
4960 : "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
4961 : nSrcXOff, nSrcYOff, nSrcXSize, nSrcYSize, nDstXOff, nDstYOff,
4962 : nDstXSize, nDstYSize);
4963 :
4964 0 : if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
4965 : {
4966 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
4967 0 : eErr = CE_Failure;
4968 0 : throw eErr;
4969 : }
4970 :
4971 : /* ====================================================================
4972 : */
4973 : /* Loop over bands. */
4974 : /* ====================================================================
4975 : */
4976 0 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
4977 : {
4978 0 : if (poWK->papanBandSrcValid != nullptr &&
4979 0 : poWK->papanBandSrcValid[iBand] != nullptr)
4980 : {
4981 0 : GDALWarpKernelOpenCL_setSrcValid(
4982 : warper,
4983 0 : reinterpret_cast<int *>(poWK->papanBandSrcValid[iBand]),
4984 : iBand);
4985 0 : if (err != CL_SUCCESS)
4986 : {
4987 0 : CPLError(
4988 : CE_Failure, CPLE_AppDefined,
4989 : "OpenCL routines reported failure (%d) on line %d.",
4990 : static_cast<int>(err), __LINE__);
4991 0 : eErr = CE_Failure;
4992 0 : throw eErr;
4993 : }
4994 : }
4995 :
4996 0 : err = GDALWarpKernelOpenCL_setSrcImg(
4997 0 : warper, poWK->papabySrcImage[iBand], iBand);
4998 0 : if (err != CL_SUCCESS)
4999 : {
5000 0 : CPLError(CE_Failure, CPLE_AppDefined,
5001 : "OpenCL routines reported failure (%d) on line %d.",
5002 : static_cast<int>(err), __LINE__);
5003 0 : eErr = CE_Failure;
5004 0 : throw eErr;
5005 : }
5006 :
5007 0 : err = GDALWarpKernelOpenCL_setDstImg(
5008 0 : warper, poWK->papabyDstImage[iBand], iBand);
5009 0 : if (err != CL_SUCCESS)
5010 : {
5011 0 : CPLError(CE_Failure, CPLE_AppDefined,
5012 : "OpenCL routines reported failure (%d) on line %d.",
5013 : static_cast<int>(err), __LINE__);
5014 0 : eErr = CE_Failure;
5015 0 : throw eErr;
5016 : }
5017 : }
5018 :
5019 : /* --------------------------------------------------------------------
5020 : */
5021 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5022 : /* scanlines worth of positions. */
5023 : /* --------------------------------------------------------------------
5024 : */
5025 :
5026 : // For x, 2 *, because we cache the precomputed values at the end.
5027 : double *padfX =
5028 0 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5029 : double *padfY =
5030 0 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5031 : double *padfZ =
5032 0 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5033 : int *pabSuccess =
5034 0 : static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5035 0 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5036 0 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5037 0 : const double dfErrorThreshold = CPLAtof(CSLFetchNameValueDef(
5038 0 : poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5039 :
5040 : // Precompute values.
5041 0 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5042 0 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5043 :
5044 : /* ====================================================================
5045 : */
5046 : /* Loop over output lines. */
5047 : /* ====================================================================
5048 : */
5049 0 : for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; ++iDstY)
5050 : {
5051 : /* ----------------------------------------------------------------
5052 : */
5053 : /* Setup points to transform to source image space. */
5054 : /* ----------------------------------------------------------------
5055 : */
5056 0 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5057 0 : const double dfYConst = iDstY + 0.5 + poWK->nDstYOff;
5058 0 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5059 0 : padfY[iDstX] = dfYConst;
5060 0 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5061 :
5062 : /* ----------------------------------------------------------------
5063 : */
5064 : /* Transform the points from destination pixel/line
5065 : * coordinates*/
5066 : /* to source pixel/line coordinates. */
5067 : /* ----------------------------------------------------------------
5068 : */
5069 0 : poWK->pfnTransformer(poWK->pTransformerArg, TRUE, nDstXSize, padfX,
5070 : padfY, padfZ, pabSuccess);
5071 0 : if (dfSrcCoordPrecision > 0.0)
5072 : {
5073 0 : GWKRoundSourceCoordinates(
5074 : nDstXSize, padfX, padfY, padfZ, pabSuccess,
5075 : dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
5076 : poWK->pTransformerArg, 0.5 + nDstXOff,
5077 0 : iDstY + 0.5 + nDstYOff);
5078 : }
5079 :
5080 0 : err = GDALWarpKernelOpenCL_setCoordRow(
5081 : warper, padfX, padfY, nSrcXOff, nSrcYOff, pabSuccess, iDstY);
5082 0 : if (err != CL_SUCCESS)
5083 : {
5084 0 : CPLError(CE_Failure, CPLE_AppDefined,
5085 : "OpenCL routines reported failure (%d) on line %d.",
5086 : static_cast<int>(err), __LINE__);
5087 0 : eErr = CE_Failure;
5088 0 : break;
5089 : }
5090 :
5091 : // Update the valid & density masks because we don't do so in the
5092 : // kernel.
5093 0 : for (int iDstX = 0; iDstX < nDstXSize && eErr == CE_None; iDstX++)
5094 : {
5095 0 : const double dfX = padfX[iDstX];
5096 0 : const double dfY = padfY[iDstX];
5097 0 : const GPtrDiff_t iDstOffset =
5098 0 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5099 :
5100 : // See GWKGeneralCase() for appropriate commenting.
5101 0 : if (!pabSuccess[iDstX] || dfX < nSrcXOff || dfY < nSrcYOff)
5102 0 : continue;
5103 :
5104 0 : int iSrcX = static_cast<int>(dfX) - nSrcXOff;
5105 0 : int iSrcY = static_cast<int>(dfY) - nSrcYOff;
5106 :
5107 0 : if (iSrcX < 0 || iSrcX >= nSrcXSize || iSrcY < 0 ||
5108 : iSrcY >= nSrcYSize)
5109 0 : continue;
5110 :
5111 0 : GPtrDiff_t iSrcOffset =
5112 0 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
5113 0 : double dfDensity = 1.0;
5114 :
5115 0 : if (poWK->pafUnifiedSrcDensity != nullptr && iSrcX >= 0 &&
5116 0 : iSrcY >= 0 && iSrcX < nSrcXSize && iSrcY < nSrcYSize)
5117 0 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5118 :
5119 0 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5120 :
5121 : // Because this is on the bit-wise level, it can't be done well
5122 : // in OpenCL.
5123 0 : if (poWK->panDstValid != nullptr)
5124 0 : poWK->panDstValid[iDstOffset >> 5] |=
5125 0 : 0x01 << (iDstOffset & 0x1f);
5126 : }
5127 : }
5128 :
5129 0 : CPLFree(padfX);
5130 0 : CPLFree(padfY);
5131 0 : CPLFree(padfZ);
5132 0 : CPLFree(pabSuccess);
5133 :
5134 0 : if (eErr != CE_None)
5135 0 : throw eErr;
5136 :
5137 0 : err = GDALWarpKernelOpenCL_runResamp(
5138 : warper, poWK->pafUnifiedSrcDensity, poWK->panUnifiedSrcValid,
5139 : poWK->pafDstDensity, poWK->panDstValid, poWK->dfXScale,
5140 : poWK->dfYScale, poWK->dfXFilter, poWK->dfYFilter, poWK->nXRadius,
5141 : poWK->nYRadius, poWK->nFiltInitX, poWK->nFiltInitY);
5142 :
5143 0 : if (err != CL_SUCCESS)
5144 : {
5145 0 : CPLError(CE_Failure, CPLE_AppDefined,
5146 : "OpenCL routines reported failure (%d) on line %d.",
5147 : static_cast<int>(err), __LINE__);
5148 0 : eErr = CE_Failure;
5149 0 : throw eErr;
5150 : }
5151 :
5152 : /* ====================================================================
5153 : */
5154 : /* Loop over output lines. */
5155 : /* ====================================================================
5156 : */
5157 0 : for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; iDstY++)
5158 : {
5159 0 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5160 : {
5161 0 : void *rowReal = nullptr;
5162 0 : void *rowImag = nullptr;
5163 0 : GByte *pabyDst = poWK->papabyDstImage[iBand];
5164 :
5165 0 : err = GDALWarpKernelOpenCL_getRow(warper, &rowReal, &rowImag,
5166 : iDstY, iBand);
5167 0 : if (err != CL_SUCCESS)
5168 : {
5169 0 : CPLError(
5170 : CE_Failure, CPLE_AppDefined,
5171 : "OpenCL routines reported failure (%d) on line %d.",
5172 : static_cast<int>(err), __LINE__);
5173 0 : eErr = CE_Failure;
5174 0 : throw eErr;
5175 : }
5176 :
5177 : // Copy the data from the warper to GDAL's memory.
5178 0 : switch (poWK->eWorkingDataType)
5179 : {
5180 0 : case GDT_Byte:
5181 0 : memcpy(&(pabyDst[iDstY * nDstXSize]), rowReal,
5182 : sizeof(GByte) * nDstXSize);
5183 0 : break;
5184 0 : case GDT_Int16:
5185 0 : memcpy(&(reinterpret_cast<GInt16 *>(
5186 0 : pabyDst)[iDstY * nDstXSize]),
5187 0 : rowReal, sizeof(GInt16) * nDstXSize);
5188 0 : break;
5189 0 : case GDT_UInt16:
5190 0 : memcpy(&(reinterpret_cast<GUInt16 *>(
5191 0 : pabyDst)[iDstY * nDstXSize]),
5192 0 : rowReal, sizeof(GUInt16) * nDstXSize);
5193 0 : break;
5194 0 : case GDT_Float32:
5195 0 : memcpy(&(reinterpret_cast<float *>(
5196 0 : pabyDst)[iDstY * nDstXSize]),
5197 0 : rowReal, sizeof(float) * nDstXSize);
5198 0 : break;
5199 0 : case GDT_CInt16:
5200 : {
5201 0 : GInt16 *pabyDstI16 = &(reinterpret_cast<GInt16 *>(
5202 0 : pabyDst)[iDstY * nDstXSize]);
5203 0 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5204 : {
5205 0 : pabyDstI16[iDstX * 2] =
5206 0 : static_cast<GInt16 *>(rowReal)[iDstX];
5207 0 : pabyDstI16[iDstX * 2 + 1] =
5208 0 : static_cast<GInt16 *>(rowImag)[iDstX];
5209 : }
5210 : }
5211 0 : break;
5212 0 : case GDT_CFloat32:
5213 : {
5214 0 : float *pabyDstF32 = &(reinterpret_cast<float *>(
5215 0 : pabyDst)[iDstY * nDstXSize]);
5216 0 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5217 : {
5218 0 : pabyDstF32[iDstX * 2] =
5219 0 : static_cast<float *>(rowReal)[iDstX];
5220 0 : pabyDstF32[iDstX * 2 + 1] =
5221 0 : static_cast<float *>(rowImag)[iDstX];
5222 : }
5223 : }
5224 0 : break;
5225 0 : default:
5226 : // No support for higher precision formats.
5227 0 : CPLError(CE_Failure, CPLE_AppDefined,
5228 : "Unsupported resampling OpenCL data type %d.",
5229 0 : static_cast<int>(poWK->eWorkingDataType));
5230 0 : eErr = CE_Failure;
5231 0 : throw eErr;
5232 : }
5233 : }
5234 : }
5235 : }
5236 0 : catch (const CPLErr &)
5237 : {
5238 : }
5239 :
5240 0 : if ((err = GDALWarpKernelOpenCL_deleteEnv(warper)) != CL_SUCCESS)
5241 : {
5242 0 : CPLError(CE_Failure, CPLE_AppDefined,
5243 : "OpenCL routines reported failure (%d) on line %d.",
5244 : static_cast<int>(err), __LINE__);
5245 0 : return CE_Failure;
5246 : }
5247 :
5248 0 : return eErr;
5249 : }
5250 : #endif /* defined(HAVE_OPENCL) */
5251 :
5252 : /************************************************************************/
5253 : /* GWKCheckAndComputeSrcOffsets() */
5254 : /************************************************************************/
5255 : static CPL_INLINE bool
5256 109622000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
5257 : int _iDstY, double *_padfX, double *_padfY,
5258 : int _nSrcXSize, int _nSrcYSize,
5259 : GPtrDiff_t &iSrcOffset)
5260 : {
5261 109622000 : const GDALWarpKernel *_poWK = psJob->poWK;
5262 109748000 : for (int iTry = 0; iTry < 2; ++iTry)
5263 : {
5264 109660000 : if (iTry == 1)
5265 : {
5266 : // If the source coordinate is slightly outside of the source raster
5267 : // retry to transform it alone, so that the exact coordinate
5268 : // transformer is used.
5269 :
5270 125879 : _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
5271 125879 : _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
5272 125879 : double dfZ = 0;
5273 125879 : _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
5274 125879 : _padfX + _iDstX, _padfY + _iDstX, &dfZ,
5275 125879 : _pabSuccess + _iDstX);
5276 : }
5277 109660000 : if (!_pabSuccess[_iDstX])
5278 3593220 : return false;
5279 :
5280 : // If this happens this is likely the symptom of a bug somewhere.
5281 106066000 : if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
5282 : {
5283 : static bool bNanCoordFound = false;
5284 0 : if (!bNanCoordFound)
5285 : {
5286 0 : CPLDebug("WARP",
5287 : "GWKCheckAndComputeSrcOffsets(): "
5288 : "NaN coordinate found on point %d.",
5289 : _iDstX);
5290 0 : bNanCoordFound = true;
5291 : }
5292 0 : return false;
5293 : }
5294 :
5295 : /* --------------------------------------------------------------------
5296 : */
5297 : /* Figure out what pixel we want in our source raster, and skip */
5298 : /* further processing if it is well off the source image. */
5299 : /* --------------------------------------------------------------------
5300 : */
5301 : /* We test against the value before casting to avoid the */
5302 : /* problem of asymmetric truncation effects around zero. That is */
5303 : /* -0.5 will be 0 when cast to an int. */
5304 106131000 : if (_padfX[_iDstX] < _poWK->nSrcXOff)
5305 : {
5306 : // If the source coordinate is slightly outside of the source raster
5307 : // retry to transform it alone, so that the exact coordinate
5308 : // transformer is used.
5309 4137540 : if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
5310 21497 : continue;
5311 4116040 : return false;
5312 : }
5313 :
5314 101993000 : if (_padfY[_iDstX] < _poWK->nSrcYOff)
5315 : {
5316 : // If the source coordinate is slightly outside of the source raster
5317 : // retry to transform it alone, so that the exact coordinate
5318 : // transformer is used.
5319 4793040 : if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
5320 38555 : continue;
5321 4754490 : return false;
5322 : }
5323 :
5324 : // Check for potential overflow when casting from float to int, (if
5325 : // operating outside natural projection area, padfX/Y can be a very huge
5326 : // positive number before doing the actual conversion), as such cast is
5327 : // undefined behavior that can trigger exception with some compilers
5328 : // (see #6753)
5329 97200200 : if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
5330 : {
5331 : // If the source coordinate is slightly outside of the source raster
5332 : // retry to transform it alone, so that the exact coordinate
5333 : // transformer is used.
5334 3499480 : if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
5335 33291 : continue;
5336 3466190 : return false;
5337 : }
5338 93700800 : if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
5339 : {
5340 : // If the source coordinate is slightly outside of the source raster
5341 : // retry to transform it alone, so that the exact coordinate
5342 : // transformer is used.
5343 3731490 : if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
5344 32536 : continue;
5345 3698950 : return false;
5346 : }
5347 :
5348 89969300 : break;
5349 : }
5350 :
5351 90057700 : int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
5352 90057700 : int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
5353 90057700 : if (iSrcX == _nSrcXSize)
5354 0 : iSrcX--;
5355 90057700 : if (iSrcY == _nSrcYSize)
5356 0 : iSrcY--;
5357 :
5358 : // Those checks should normally be OK given the previous ones.
5359 90057700 : CPLAssert(iSrcX >= 0);
5360 90057700 : CPLAssert(iSrcY >= 0);
5361 90057700 : CPLAssert(iSrcX < _nSrcXSize);
5362 90057700 : CPLAssert(iSrcY < _nSrcYSize);
5363 :
5364 90057700 : iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
5365 :
5366 90057700 : return true;
5367 : }
5368 :
5369 : /************************************************************************/
5370 : /* GWKOneSourceCornerFailsToReproject() */
5371 : /************************************************************************/
5372 :
5373 720 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
5374 : {
5375 720 : GDALWarpKernel *poWK = psJob->poWK;
5376 2150 : for (int iY = 0; iY <= 1; ++iY)
5377 : {
5378 4296 : for (int iX = 0; iX <= 1; ++iX)
5379 : {
5380 2866 : double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
5381 2866 : double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
5382 2866 : double dfZTmp = 0;
5383 2866 : int nSuccess = FALSE;
5384 2866 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
5385 : &dfYTmp, &dfZTmp, &nSuccess);
5386 2866 : if (!nSuccess)
5387 6 : return true;
5388 : }
5389 : }
5390 714 : return false;
5391 : }
5392 :
5393 : /************************************************************************/
5394 : /* GWKAdjustSrcOffsetOnEdge() */
5395 : /************************************************************************/
5396 :
5397 9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
5398 : GPtrDiff_t &iSrcOffset)
5399 : {
5400 9714 : GDALWarpKernel *poWK = psJob->poWK;
5401 9714 : const int nSrcXSize = poWK->nSrcXSize;
5402 9714 : const int nSrcYSize = poWK->nSrcYSize;
5403 :
5404 : // Check if the computed source position slightly altered
5405 : // fails to reproject. If so, then we are at the edge of
5406 : // the validity area, and it is worth checking neighbour
5407 : // source pixels for validity.
5408 9714 : int nSuccess = FALSE;
5409 : {
5410 9714 : double dfXTmp =
5411 9714 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5412 9714 : double dfYTmp =
5413 9714 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5414 9714 : double dfZTmp = 0;
5415 9714 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5416 : &dfZTmp, &nSuccess);
5417 : }
5418 9714 : if (nSuccess)
5419 : {
5420 6996 : double dfXTmp =
5421 6996 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5422 6996 : double dfYTmp =
5423 6996 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5424 6996 : double dfZTmp = 0;
5425 6996 : nSuccess = FALSE;
5426 6996 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5427 : &dfZTmp, &nSuccess);
5428 : }
5429 9714 : if (nSuccess)
5430 : {
5431 5624 : double dfXTmp =
5432 5624 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5433 5624 : double dfYTmp =
5434 5624 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5435 5624 : double dfZTmp = 0;
5436 5624 : nSuccess = FALSE;
5437 5624 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5438 : &dfZTmp, &nSuccess);
5439 : }
5440 :
5441 14166 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5442 4452 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
5443 : {
5444 1860 : iSrcOffset++;
5445 1860 : return true;
5446 : }
5447 10290 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5448 2436 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
5449 : {
5450 1334 : iSrcOffset += nSrcXSize;
5451 1334 : return true;
5452 : }
5453 7838 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5454 1318 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
5455 : {
5456 956 : iSrcOffset--;
5457 956 : return true;
5458 : }
5459 5924 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5460 360 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
5461 : {
5462 340 : iSrcOffset -= nSrcXSize;
5463 340 : return true;
5464 : }
5465 :
5466 5224 : return false;
5467 : }
5468 :
5469 : /************************************************************************/
5470 : /* GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity() */
5471 : /************************************************************************/
5472 :
5473 0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
5474 : GPtrDiff_t &iSrcOffset)
5475 : {
5476 0 : GDALWarpKernel *poWK = psJob->poWK;
5477 0 : const int nSrcXSize = poWK->nSrcXSize;
5478 0 : const int nSrcYSize = poWK->nSrcYSize;
5479 :
5480 : // Check if the computed source position slightly altered
5481 : // fails to reproject. If so, then we are at the edge of
5482 : // the validity area, and it is worth checking neighbour
5483 : // source pixels for validity.
5484 0 : int nSuccess = FALSE;
5485 : {
5486 0 : double dfXTmp =
5487 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5488 0 : double dfYTmp =
5489 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5490 0 : double dfZTmp = 0;
5491 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5492 : &dfZTmp, &nSuccess);
5493 : }
5494 0 : if (nSuccess)
5495 : {
5496 0 : double dfXTmp =
5497 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5498 0 : double dfYTmp =
5499 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5500 0 : double dfZTmp = 0;
5501 0 : nSuccess = FALSE;
5502 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5503 : &dfZTmp, &nSuccess);
5504 : }
5505 0 : if (nSuccess)
5506 : {
5507 0 : double dfXTmp =
5508 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5509 0 : double dfYTmp =
5510 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5511 0 : double dfZTmp = 0;
5512 0 : nSuccess = FALSE;
5513 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5514 : &dfZTmp, &nSuccess);
5515 : }
5516 :
5517 0 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5518 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >= SRC_DENSITY_THRESHOLD)
5519 : {
5520 0 : iSrcOffset++;
5521 0 : return true;
5522 : }
5523 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5524 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
5525 : SRC_DENSITY_THRESHOLD)
5526 : {
5527 0 : iSrcOffset += nSrcXSize;
5528 0 : return true;
5529 : }
5530 0 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5531 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
5532 : SRC_DENSITY_THRESHOLD)
5533 : {
5534 0 : iSrcOffset--;
5535 0 : return true;
5536 : }
5537 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5538 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
5539 : SRC_DENSITY_THRESHOLD)
5540 : {
5541 0 : iSrcOffset -= nSrcXSize;
5542 0 : return true;
5543 : }
5544 :
5545 0 : return false;
5546 : }
5547 :
5548 : /************************************************************************/
5549 : /* GWKGeneralCase() */
5550 : /* */
5551 : /* This is the most general case. It attempts to handle all */
5552 : /* possible features with relatively little concern for */
5553 : /* efficiency. */
5554 : /************************************************************************/
5555 :
5556 243 : static void GWKGeneralCaseThread(void *pData)
5557 : {
5558 243 : GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
5559 243 : GDALWarpKernel *poWK = psJob->poWK;
5560 243 : const int iYMin = psJob->iYMin;
5561 243 : const int iYMax = psJob->iYMax;
5562 : const double dfMultFactorVerticalShiftPipeline =
5563 243 : poWK->bApplyVerticalShift
5564 243 : ? CPLAtof(CSLFetchNameValueDef(
5565 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5566 : "1.0"))
5567 243 : : 0.0;
5568 :
5569 243 : int nDstXSize = poWK->nDstXSize;
5570 243 : int nSrcXSize = poWK->nSrcXSize;
5571 243 : int nSrcYSize = poWK->nSrcYSize;
5572 :
5573 : /* -------------------------------------------------------------------- */
5574 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5575 : /* scanlines worth of positions. */
5576 : /* -------------------------------------------------------------------- */
5577 : // For x, 2 *, because we cache the precomputed values at the end.
5578 : double *padfX =
5579 243 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5580 : double *padfY =
5581 243 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5582 : double *padfZ =
5583 243 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5584 243 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5585 :
5586 243 : const bool bUse4SamplesFormula =
5587 243 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5588 :
5589 243 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5590 243 : if (poWK->eResample != GRA_NearestNeighbour)
5591 : {
5592 224 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5593 : }
5594 243 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5595 243 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5596 243 : const double dfErrorThreshold = CPLAtof(
5597 243 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5598 :
5599 : const bool bOneSourceCornerFailsToReproject =
5600 243 : GWKOneSourceCornerFailsToReproject(psJob);
5601 :
5602 : // Precompute values.
5603 6513 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5604 6270 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5605 :
5606 : /* ==================================================================== */
5607 : /* Loop over output lines. */
5608 : /* ==================================================================== */
5609 6513 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5610 : {
5611 : /* --------------------------------------------------------------------
5612 : */
5613 : /* Setup points to transform to source image space. */
5614 : /* --------------------------------------------------------------------
5615 : */
5616 6270 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5617 6270 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5618 242830 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5619 236560 : padfY[iDstX] = dfY;
5620 6270 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5621 :
5622 : /* --------------------------------------------------------------------
5623 : */
5624 : /* Transform the points from destination pixel/line coordinates */
5625 : /* to source pixel/line coordinates. */
5626 : /* --------------------------------------------------------------------
5627 : */
5628 6270 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5629 : padfY, padfZ, pabSuccess);
5630 6270 : if (dfSrcCoordPrecision > 0.0)
5631 : {
5632 0 : GWKRoundSourceCoordinates(
5633 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5634 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5635 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5636 : }
5637 :
5638 : /* ====================================================================
5639 : */
5640 : /* Loop over pixels in output scanline. */
5641 : /* ====================================================================
5642 : */
5643 242830 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5644 : {
5645 236560 : GPtrDiff_t iSrcOffset = 0;
5646 236560 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5647 : padfX, padfY, nSrcXSize,
5648 : nSrcYSize, iSrcOffset))
5649 0 : continue;
5650 :
5651 : /* --------------------------------------------------------------------
5652 : */
5653 : /* Do not try to apply transparent/invalid source pixels to the
5654 : */
5655 : /* destination. This currently ignores the multi-pixel input
5656 : */
5657 : /* of bilinear and cubic resamples. */
5658 : /* --------------------------------------------------------------------
5659 : */
5660 236560 : double dfDensity = 1.0;
5661 :
5662 236560 : if (poWK->pafUnifiedSrcDensity != nullptr)
5663 : {
5664 1200 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5665 1200 : if (dfDensity < SRC_DENSITY_THRESHOLD)
5666 : {
5667 0 : if (!bOneSourceCornerFailsToReproject)
5668 : {
5669 0 : continue;
5670 : }
5671 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5672 : psJob, iSrcOffset))
5673 : {
5674 0 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5675 : }
5676 : else
5677 : {
5678 0 : continue;
5679 : }
5680 : }
5681 : }
5682 :
5683 236560 : if (poWK->panUnifiedSrcValid != nullptr &&
5684 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5685 : {
5686 0 : if (!bOneSourceCornerFailsToReproject)
5687 : {
5688 0 : continue;
5689 : }
5690 0 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5691 : {
5692 0 : continue;
5693 : }
5694 : }
5695 :
5696 : /* ====================================================================
5697 : */
5698 : /* Loop processing each band. */
5699 : /* ====================================================================
5700 : */
5701 236560 : bool bHasFoundDensity = false;
5702 :
5703 236560 : const GPtrDiff_t iDstOffset =
5704 236560 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5705 473120 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5706 : {
5707 236560 : double dfBandDensity = 0.0;
5708 236560 : double dfValueReal = 0.0;
5709 236560 : double dfValueImag = 0.0;
5710 :
5711 : /* --------------------------------------------------------------------
5712 : */
5713 : /* Collect the source value. */
5714 : /* --------------------------------------------------------------------
5715 : */
5716 236560 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5717 : nSrcYSize == 1)
5718 : {
5719 : // FALSE is returned if dfBandDensity == 0, which is
5720 : // checked below.
5721 568 : CPL_IGNORE_RET_VAL(GWKGetPixelValue(
5722 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
5723 : &dfValueImag));
5724 : }
5725 235992 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5726 : {
5727 648 : GWKBilinearResample4Sample(
5728 648 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5729 648 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5730 : &dfValueReal, &dfValueImag);
5731 : }
5732 235344 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5733 : {
5734 248 : GWKCubicResample4Sample(
5735 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5736 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5737 : &dfValueReal, &dfValueImag);
5738 : }
5739 : else
5740 : #ifdef DEBUG
5741 : // Only useful for clang static analyzer.
5742 235096 : if (psWrkStruct != nullptr)
5743 : #endif
5744 : {
5745 235096 : psWrkStruct->pfnGWKResample(
5746 235096 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5747 235096 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5748 : &dfValueReal, &dfValueImag, psWrkStruct);
5749 : }
5750 :
5751 : // If we didn't find any valid inputs skip to next band.
5752 236560 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5753 0 : continue;
5754 :
5755 236560 : if (poWK->bApplyVerticalShift)
5756 : {
5757 0 : if (!std::isfinite(padfZ[iDstX]))
5758 0 : continue;
5759 : // Subtract padfZ[] since the coordinate transformation is
5760 : // from target to source
5761 0 : dfValueReal =
5762 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
5763 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5764 : }
5765 :
5766 236560 : bHasFoundDensity = true;
5767 :
5768 : /* --------------------------------------------------------------------
5769 : */
5770 : /* We have a computed value from the source. Now apply it
5771 : * to */
5772 : /* the destination pixel. */
5773 : /* --------------------------------------------------------------------
5774 : */
5775 236560 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
5776 : dfValueReal, dfValueImag);
5777 : }
5778 :
5779 236560 : if (!bHasFoundDensity)
5780 0 : continue;
5781 :
5782 : /* --------------------------------------------------------------------
5783 : */
5784 : /* Update destination density/validity masks. */
5785 : /* --------------------------------------------------------------------
5786 : */
5787 236560 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5788 :
5789 236560 : if (poWK->panDstValid != nullptr)
5790 : {
5791 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
5792 : }
5793 : } /* Next iDstX */
5794 :
5795 : /* --------------------------------------------------------------------
5796 : */
5797 : /* Report progress to the user, and optionally cancel out. */
5798 : /* --------------------------------------------------------------------
5799 : */
5800 6270 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5801 0 : break;
5802 : }
5803 :
5804 : /* -------------------------------------------------------------------- */
5805 : /* Cleanup and return. */
5806 : /* -------------------------------------------------------------------- */
5807 243 : CPLFree(padfX);
5808 243 : CPLFree(padfY);
5809 243 : CPLFree(padfZ);
5810 243 : CPLFree(pabSuccess);
5811 243 : if (psWrkStruct)
5812 224 : GWKResampleDeleteWrkStruct(psWrkStruct);
5813 243 : }
5814 :
5815 243 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
5816 : {
5817 243 : return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
5818 : }
5819 :
5820 : /************************************************************************/
5821 : /* GWKRealCase() */
5822 : /* */
5823 : /* General case for non-complex data types. */
5824 : /************************************************************************/
5825 :
5826 134 : static void GWKRealCaseThread(void *pData)
5827 :
5828 : {
5829 134 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5830 134 : GDALWarpKernel *poWK = psJob->poWK;
5831 134 : const int iYMin = psJob->iYMin;
5832 134 : const int iYMax = psJob->iYMax;
5833 :
5834 134 : const int nDstXSize = poWK->nDstXSize;
5835 134 : const int nSrcXSize = poWK->nSrcXSize;
5836 134 : const int nSrcYSize = poWK->nSrcYSize;
5837 : const double dfMultFactorVerticalShiftPipeline =
5838 134 : poWK->bApplyVerticalShift
5839 134 : ? CPLAtof(CSLFetchNameValueDef(
5840 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5841 : "1.0"))
5842 134 : : 0.0;
5843 :
5844 : /* -------------------------------------------------------------------- */
5845 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5846 : /* scanlines worth of positions. */
5847 : /* -------------------------------------------------------------------- */
5848 :
5849 : // For x, 2 *, because we cache the precomputed values at the end.
5850 : double *padfX =
5851 134 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5852 : double *padfY =
5853 134 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5854 : double *padfZ =
5855 134 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5856 134 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5857 :
5858 134 : const bool bUse4SamplesFormula =
5859 134 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5860 :
5861 134 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5862 134 : if (poWK->eResample != GRA_NearestNeighbour)
5863 : {
5864 118 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5865 : }
5866 134 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5867 134 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5868 134 : const double dfErrorThreshold = CPLAtof(
5869 134 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5870 :
5871 387 : const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
5872 253 : poWK->papanBandSrcValid == nullptr &&
5873 119 : poWK->pafUnifiedSrcDensity != nullptr;
5874 :
5875 : const bool bOneSourceCornerFailsToReproject =
5876 134 : GWKOneSourceCornerFailsToReproject(psJob);
5877 :
5878 : // Precompute values.
5879 18829 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5880 18695 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5881 :
5882 : /* ==================================================================== */
5883 : /* Loop over output lines. */
5884 : /* ==================================================================== */
5885 21580 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5886 : {
5887 : /* --------------------------------------------------------------------
5888 : */
5889 : /* Setup points to transform to source image space. */
5890 : /* --------------------------------------------------------------------
5891 : */
5892 21446 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5893 21446 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5894 43460600 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5895 43439100 : padfY[iDstX] = dfY;
5896 21446 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5897 :
5898 : /* --------------------------------------------------------------------
5899 : */
5900 : /* Transform the points from destination pixel/line coordinates */
5901 : /* to source pixel/line coordinates. */
5902 : /* --------------------------------------------------------------------
5903 : */
5904 21446 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5905 : padfY, padfZ, pabSuccess);
5906 21446 : if (dfSrcCoordPrecision > 0.0)
5907 : {
5908 0 : GWKRoundSourceCoordinates(
5909 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5910 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5911 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5912 : }
5913 :
5914 : /* ====================================================================
5915 : */
5916 : /* Loop over pixels in output scanline. */
5917 : /* ====================================================================
5918 : */
5919 43460600 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5920 : {
5921 43439100 : GPtrDiff_t iSrcOffset = 0;
5922 43439100 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5923 : padfX, padfY, nSrcXSize,
5924 : nSrcYSize, iSrcOffset))
5925 42846800 : continue;
5926 :
5927 : /* --------------------------------------------------------------------
5928 : */
5929 : /* Do not try to apply transparent/invalid source pixels to the
5930 : */
5931 : /* destination. This currently ignores the multi-pixel input
5932 : */
5933 : /* of bilinear and cubic resamples. */
5934 : /* --------------------------------------------------------------------
5935 : */
5936 31382600 : double dfDensity = 1.0;
5937 :
5938 31382600 : if (poWK->pafUnifiedSrcDensity != nullptr)
5939 : {
5940 1262880 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5941 1262880 : if (dfDensity < SRC_DENSITY_THRESHOLD)
5942 : {
5943 1261590 : if (!bOneSourceCornerFailsToReproject)
5944 : {
5945 1261590 : continue;
5946 : }
5947 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5948 : psJob, iSrcOffset))
5949 : {
5950 0 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5951 : }
5952 : else
5953 : {
5954 0 : continue;
5955 : }
5956 : }
5957 : }
5958 :
5959 59749600 : if (poWK->panUnifiedSrcValid != nullptr &&
5960 29628600 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5961 : {
5962 29531000 : if (!bOneSourceCornerFailsToReproject)
5963 : {
5964 29528700 : continue;
5965 : }
5966 2229 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5967 : {
5968 0 : continue;
5969 : }
5970 : }
5971 :
5972 : /* ====================================================================
5973 : */
5974 : /* Loop processing each band. */
5975 : /* ====================================================================
5976 : */
5977 592300 : bool bHasFoundDensity = false;
5978 :
5979 592300 : const GPtrDiff_t iDstOffset =
5980 592300 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5981 1516060 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5982 : {
5983 923761 : double dfBandDensity = 0.0;
5984 923761 : double dfValueReal = 0.0;
5985 :
5986 : /* --------------------------------------------------------------------
5987 : */
5988 : /* Collect the source value. */
5989 : /* --------------------------------------------------------------------
5990 : */
5991 923761 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5992 : nSrcYSize == 1)
5993 : {
5994 : // FALSE is returned if dfBandDensity == 0, which is
5995 : // checked below.
5996 1012 : CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
5997 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
5998 : }
5999 922749 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
6000 : {
6001 1326 : double dfValueImagIgnored = 0.0;
6002 1326 : GWKBilinearResample4Sample(
6003 1326 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6004 1326 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6005 1326 : &dfValueReal, &dfValueImagIgnored);
6006 : }
6007 921423 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
6008 : {
6009 299992 : if (bSrcMaskIsDensity)
6010 : {
6011 361 : if (poWK->eWorkingDataType == GDT_Byte)
6012 : {
6013 361 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
6014 361 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6015 361 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6016 : &dfValueReal);
6017 : }
6018 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
6019 : {
6020 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<
6021 0 : GUInt16>(poWK, iBand,
6022 0 : padfX[iDstX] - poWK->nSrcXOff,
6023 0 : padfY[iDstX] - poWK->nSrcYOff,
6024 : &dfBandDensity, &dfValueReal);
6025 : }
6026 : else
6027 : {
6028 0 : GWKCubicResampleSrcMaskIsDensity4SampleReal(
6029 0 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6030 0 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6031 : &dfValueReal);
6032 : }
6033 : }
6034 : else
6035 : {
6036 299631 : double dfValueImagIgnored = 0.0;
6037 299631 : GWKCubicResample4Sample(
6038 299631 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6039 299631 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6040 : &dfValueReal, &dfValueImagIgnored);
6041 299992 : }
6042 : }
6043 : else
6044 : #ifdef DEBUG
6045 : // Only useful for clang static analyzer.
6046 621431 : if (psWrkStruct != nullptr)
6047 : #endif
6048 : {
6049 621431 : double dfValueImagIgnored = 0.0;
6050 621431 : psWrkStruct->pfnGWKResample(
6051 621431 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6052 621431 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6053 : &dfValueReal, &dfValueImagIgnored, psWrkStruct);
6054 : }
6055 :
6056 : // If we didn't find any valid inputs skip to next band.
6057 923761 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
6058 0 : continue;
6059 :
6060 923761 : if (poWK->bApplyVerticalShift)
6061 : {
6062 0 : if (!std::isfinite(padfZ[iDstX]))
6063 0 : continue;
6064 : // Subtract padfZ[] since the coordinate transformation is
6065 : // from target to source
6066 0 : dfValueReal =
6067 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
6068 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
6069 : }
6070 :
6071 923761 : bHasFoundDensity = true;
6072 :
6073 : /* --------------------------------------------------------------------
6074 : */
6075 : /* We have a computed value from the source. Now apply it
6076 : * to */
6077 : /* the destination pixel. */
6078 : /* --------------------------------------------------------------------
6079 : */
6080 923761 : GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
6081 : dfValueReal);
6082 : }
6083 :
6084 592300 : if (!bHasFoundDensity)
6085 0 : continue;
6086 :
6087 : /* --------------------------------------------------------------------
6088 : */
6089 : /* Update destination density/validity masks. */
6090 : /* --------------------------------------------------------------------
6091 : */
6092 592300 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6093 :
6094 592300 : if (poWK->panDstValid != nullptr)
6095 : {
6096 101460 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6097 : }
6098 : } // Next iDstX.
6099 :
6100 : /* --------------------------------------------------------------------
6101 : */
6102 : /* Report progress to the user, and optionally cancel out. */
6103 : /* --------------------------------------------------------------------
6104 : */
6105 21446 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6106 0 : break;
6107 : }
6108 :
6109 : /* -------------------------------------------------------------------- */
6110 : /* Cleanup and return. */
6111 : /* -------------------------------------------------------------------- */
6112 134 : CPLFree(padfX);
6113 134 : CPLFree(padfY);
6114 134 : CPLFree(padfZ);
6115 134 : CPLFree(pabSuccess);
6116 134 : if (psWrkStruct)
6117 118 : GWKResampleDeleteWrkStruct(psWrkStruct);
6118 134 : }
6119 :
6120 134 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
6121 : {
6122 134 : return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
6123 : }
6124 :
6125 : /************************************************************************/
6126 : /* GWKCubicResampleNoMasks4MultiBandT() */
6127 : /************************************************************************/
6128 :
6129 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
6130 : /* and enough SSE registries */
6131 : #if defined(USE_SSE2)
6132 :
6133 238596 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
6134 : const __m128 row2, const __m128 row3,
6135 : const __m128 weightsXY0,
6136 : const __m128 weightsXY1,
6137 : const __m128 weightsXY2,
6138 : const __m128 weightsXY3)
6139 : {
6140 1670170 : return XMMHorizontalAdd(_mm_add_ps(
6141 : _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
6142 : _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
6143 238596 : _mm_mul_ps(row3, weightsXY3))));
6144 : }
6145 :
6146 : template <class T>
6147 81323 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
6148 : double dfSrcX, double dfSrcY,
6149 : const GPtrDiff_t iDstOffset)
6150 : {
6151 81323 : const double dfSrcXShifted = dfSrcX - 0.5;
6152 81323 : const int iSrcX = static_cast<int>(dfSrcXShifted);
6153 81323 : const double dfSrcYShifted = dfSrcY - 0.5;
6154 81323 : const int iSrcY = static_cast<int>(dfSrcYShifted);
6155 81323 : const GPtrDiff_t iSrcOffset =
6156 81323 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
6157 :
6158 : // Get the bilinear interpolation at the image borders.
6159 81323 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
6160 80326 : iSrcY + 2 >= poWK->nSrcYSize)
6161 : {
6162 7164 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6163 : {
6164 : T value;
6165 5373 : GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
6166 : &value);
6167 5373 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6168 : value;
6169 1791 : }
6170 : }
6171 : else
6172 : {
6173 79532 : const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
6174 79532 : const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
6175 :
6176 : float afCoeffsX[4];
6177 : float afCoeffsY[4];
6178 79532 : GWKCubicComputeWeights(fDeltaX, afCoeffsX);
6179 79532 : GWKCubicComputeWeights(fDeltaY, afCoeffsY);
6180 79532 : const auto weightsX = _mm_loadu_ps(afCoeffsX);
6181 : const auto weightsXY0 =
6182 159064 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
6183 : const auto weightsXY1 =
6184 159064 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
6185 : const auto weightsXY2 =
6186 159064 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
6187 : const auto weightsXY3 =
6188 79532 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
6189 :
6190 79532 : const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
6191 :
6192 79532 : int iBand = 0;
6193 : // Process 2 bands at a time
6194 159064 : for (; iBand + 1 < poWK->nBands; iBand += 2)
6195 : {
6196 79532 : const T *CPL_RESTRICT pBand0 =
6197 79532 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6198 79532 : const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
6199 : const auto row1_0 =
6200 79532 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6201 : const auto row2_0 =
6202 79532 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6203 : const auto row3_0 =
6204 79532 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6205 :
6206 79532 : const T *CPL_RESTRICT pBand1 =
6207 79532 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
6208 79532 : const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
6209 : const auto row1_1 =
6210 79532 : XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
6211 : const auto row2_1 =
6212 79532 : XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
6213 : const auto row3_1 =
6214 79532 : XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
6215 :
6216 : const float fValue_0 =
6217 79532 : Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
6218 : weightsXY1, weightsXY2, weightsXY3);
6219 :
6220 : const float fValue_1 =
6221 79532 : Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
6222 : weightsXY1, weightsXY2, weightsXY3);
6223 :
6224 79532 : T *CPL_RESTRICT pDstBand0 =
6225 79532 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6226 79532 : pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
6227 :
6228 79532 : T *CPL_RESTRICT pDstBand1 =
6229 79532 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
6230 79532 : pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
6231 : }
6232 79532 : if (iBand < poWK->nBands)
6233 : {
6234 79532 : const T *CPL_RESTRICT pBand0 =
6235 79532 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6236 79532 : const auto row0 = XMMLoad4Values(pBand0 + iOffset);
6237 : const auto row1 =
6238 79532 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6239 : const auto row2 =
6240 79532 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6241 : const auto row3 =
6242 79532 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6243 :
6244 : const float fValue =
6245 79532 : Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
6246 : weightsXY2, weightsXY3);
6247 :
6248 79532 : T *CPL_RESTRICT pDstBand =
6249 79532 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6250 79532 : pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
6251 : }
6252 : }
6253 :
6254 81323 : if (poWK->pafDstDensity)
6255 441 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6256 81323 : }
6257 :
6258 : #endif // defined(USE_SSE2)
6259 :
6260 : /************************************************************************/
6261 : /* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */
6262 : /************************************************************************/
6263 :
6264 : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
6265 1174 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
6266 :
6267 : {
6268 1174 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6269 1174 : GDALWarpKernel *poWK = psJob->poWK;
6270 1174 : const int iYMin = psJob->iYMin;
6271 1174 : const int iYMax = psJob->iYMax;
6272 1156 : const double dfMultFactorVerticalShiftPipeline =
6273 1174 : poWK->bApplyVerticalShift
6274 18 : ? CPLAtof(CSLFetchNameValueDef(
6275 18 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6276 : "1.0"))
6277 : : 0.0;
6278 :
6279 1174 : const int nDstXSize = poWK->nDstXSize;
6280 1174 : const int nSrcXSize = poWK->nSrcXSize;
6281 1174 : const int nSrcYSize = poWK->nSrcYSize;
6282 :
6283 : /* -------------------------------------------------------------------- */
6284 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6285 : /* scanlines worth of positions. */
6286 : /* -------------------------------------------------------------------- */
6287 :
6288 : // For x, 2 *, because we cache the precomputed values at the end.
6289 : double *padfX =
6290 1174 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6291 : double *padfY =
6292 1174 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6293 : double *padfZ =
6294 1174 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6295 1174 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6296 :
6297 1174 : const int nXRadius = poWK->nXRadius;
6298 : double *padfWeightsX =
6299 1174 : static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
6300 : double *padfWeightsY = static_cast<double *>(
6301 1174 : CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
6302 1174 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6303 1174 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6304 1174 : const double dfErrorThreshold = CPLAtof(
6305 1174 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6306 :
6307 : // Precompute values.
6308 254688 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6309 253514 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6310 :
6311 : /* ==================================================================== */
6312 : /* Loop over output lines. */
6313 : /* ==================================================================== */
6314 129896 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6315 : {
6316 : /* --------------------------------------------------------------------
6317 : */
6318 : /* Setup points to transform to source image space. */
6319 : /* --------------------------------------------------------------------
6320 : */
6321 128723 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6322 128723 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6323 58231394 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6324 58102666 : padfY[iDstX] = dfY;
6325 128723 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6326 :
6327 : /* --------------------------------------------------------------------
6328 : */
6329 : /* Transform the points from destination pixel/line coordinates */
6330 : /* to source pixel/line coordinates. */
6331 : /* --------------------------------------------------------------------
6332 : */
6333 128723 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6334 : padfY, padfZ, pabSuccess);
6335 128723 : if (dfSrcCoordPrecision > 0.0)
6336 : {
6337 1000 : GWKRoundSourceCoordinates(
6338 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6339 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6340 1000 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6341 : }
6342 :
6343 : /* ====================================================================
6344 : */
6345 : /* Loop over pixels in output scanline. */
6346 : /* ====================================================================
6347 : */
6348 58374314 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6349 : {
6350 58245586 : GPtrDiff_t iSrcOffset = 0;
6351 58245586 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6352 : padfX, padfY, nSrcXSize,
6353 : nSrcYSize, iSrcOffset))
6354 6540838 : continue;
6355 :
6356 : /* ====================================================================
6357 : */
6358 : /* Loop processing each band. */
6359 : /* ====================================================================
6360 : */
6361 51703812 : const GPtrDiff_t iDstOffset =
6362 51703812 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6363 :
6364 : #if defined(USE_SSE2)
6365 : if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
6366 : (std::is_same<T, GByte>::value ||
6367 : std::is_same<T, GUInt16>::value))
6368 : {
6369 752574 : if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
6370 : {
6371 81323 : GWKCubicResampleNoMasks4MultiBandT<T>(
6372 81323 : poWK, padfX[iDstX] - poWK->nSrcXOff,
6373 81323 : padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
6374 :
6375 81323 : continue;
6376 : }
6377 : }
6378 : #endif // defined(USE_SSE2)
6379 :
6380 51622489 : [[maybe_unused]] double dfInvWeights = 0;
6381 144496798 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6382 : {
6383 92792487 : T value = 0;
6384 : if constexpr (eResample == GRA_NearestNeighbour)
6385 : {
6386 76917549 : value = reinterpret_cast<T *>(
6387 76917549 : poWK->papabySrcImage[iBand])[iSrcOffset];
6388 : }
6389 : else if constexpr (bUse4SamplesFormula)
6390 : {
6391 : if constexpr (eResample == GRA_Bilinear)
6392 4806886 : GWKBilinearResampleNoMasks4SampleT(
6393 4806886 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6394 4806886 : padfY[iDstX] - poWK->nSrcYOff, &value);
6395 : else
6396 1906603 : GWKCubicResampleNoMasks4SampleT(
6397 1906603 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6398 1906603 : padfY[iDstX] - poWK->nSrcYOff, &value);
6399 : }
6400 : else
6401 : {
6402 9161449 : GWKResampleNoMasksT(
6403 9161449 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6404 9161449 : padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
6405 : padfWeightsY, dfInvWeights);
6406 : }
6407 :
6408 92791297 : if (poWK->bApplyVerticalShift)
6409 : {
6410 818 : if (!std::isfinite(padfZ[iDstX]))
6411 0 : continue;
6412 : // Subtract padfZ[] since the coordinate transformation is
6413 : // from target to source
6414 86023 : value = GWKClampValueT<T>(
6415 818 : value * poWK->dfMultFactorVerticalShift -
6416 818 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6417 : }
6418 :
6419 92874787 : if (poWK->pafDstDensity)
6420 11712299 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6421 :
6422 92874787 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6423 : value;
6424 : }
6425 : }
6426 :
6427 : /* --------------------------------------------------------------------
6428 : */
6429 : /* Report progress to the user, and optionally cancel out. */
6430 : /* --------------------------------------------------------------------
6431 : */
6432 128723 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6433 1 : break;
6434 : }
6435 :
6436 : /* -------------------------------------------------------------------- */
6437 : /* Cleanup and return. */
6438 : /* -------------------------------------------------------------------- */
6439 1174 : CPLFree(padfX);
6440 1174 : CPLFree(padfY);
6441 1174 : CPLFree(padfZ);
6442 1174 : CPLFree(pabSuccess);
6443 1174 : CPLFree(padfWeightsX);
6444 1174 : CPLFree(padfWeightsY);
6445 1174 : }
6446 :
6447 : template <class T, GDALResampleAlg eResample>
6448 918 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
6449 : {
6450 918 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6451 : pData);
6452 918 : }
6453 :
6454 : template <class T, GDALResampleAlg eResample>
6455 256 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
6456 :
6457 : {
6458 256 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6459 256 : GDALWarpKernel *poWK = psJob->poWK;
6460 : static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
6461 256 : const bool bUse4SamplesFormula =
6462 256 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
6463 256 : if (bUse4SamplesFormula)
6464 156 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
6465 : pData);
6466 : else
6467 100 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6468 : pData);
6469 256 : }
6470 :
6471 863 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6472 : {
6473 863 : return GWKRun(
6474 : poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
6475 863 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
6476 : }
6477 :
6478 126 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6479 : {
6480 126 : return GWKRun(
6481 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
6482 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
6483 126 : GRA_Bilinear>);
6484 : }
6485 :
6486 72 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6487 : {
6488 72 : return GWKRun(
6489 : poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
6490 72 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
6491 : }
6492 :
6493 9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6494 : {
6495 9 : return GWKRun(
6496 : poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
6497 9 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
6498 : }
6499 :
6500 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6501 :
6502 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6503 : {
6504 : return GWKRun(
6505 : poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
6506 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
6507 : }
6508 : #endif
6509 :
6510 12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6511 : {
6512 12 : return GWKRun(
6513 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
6514 12 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
6515 : }
6516 :
6517 : /************************************************************************/
6518 : /* GWKNearestByte() */
6519 : /* */
6520 : /* Case for 8bit input data with nearest neighbour resampling */
6521 : /* using valid flags. Should be as fast as possible for this */
6522 : /* particular transformation type. */
6523 : /************************************************************************/
6524 :
6525 343 : template <class T> static void GWKNearestThread(void *pData)
6526 :
6527 : {
6528 343 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6529 343 : GDALWarpKernel *poWK = psJob->poWK;
6530 343 : const int iYMin = psJob->iYMin;
6531 343 : const int iYMax = psJob->iYMax;
6532 343 : const double dfMultFactorVerticalShiftPipeline =
6533 343 : poWK->bApplyVerticalShift
6534 0 : ? CPLAtof(CSLFetchNameValueDef(
6535 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6536 : "1.0"))
6537 : : 0.0;
6538 :
6539 343 : const int nDstXSize = poWK->nDstXSize;
6540 343 : const int nSrcXSize = poWK->nSrcXSize;
6541 343 : const int nSrcYSize = poWK->nSrcYSize;
6542 :
6543 : /* -------------------------------------------------------------------- */
6544 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6545 : /* scanlines worth of positions. */
6546 : /* -------------------------------------------------------------------- */
6547 :
6548 : // For x, 2 *, because we cache the precomputed values at the end.
6549 : double *padfX =
6550 343 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6551 : double *padfY =
6552 343 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6553 : double *padfZ =
6554 343 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6555 343 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6556 :
6557 343 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6558 343 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6559 343 : const double dfErrorThreshold = CPLAtof(
6560 343 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6561 :
6562 : const bool bOneSourceCornerFailsToReproject =
6563 343 : GWKOneSourceCornerFailsToReproject(psJob);
6564 :
6565 : // Precompute values.
6566 49707 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6567 49364 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6568 :
6569 : /* ==================================================================== */
6570 : /* Loop over output lines. */
6571 : /* ==================================================================== */
6572 37157 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6573 : {
6574 :
6575 : /* --------------------------------------------------------------------
6576 : */
6577 : /* Setup points to transform to source image space. */
6578 : /* --------------------------------------------------------------------
6579 : */
6580 36814 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6581 36814 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6582 7743095 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6583 7706282 : padfY[iDstX] = dfY;
6584 36814 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6585 :
6586 : /* --------------------------------------------------------------------
6587 : */
6588 : /* Transform the points from destination pixel/line coordinates */
6589 : /* to source pixel/line coordinates. */
6590 : /* --------------------------------------------------------------------
6591 : */
6592 36814 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6593 : padfY, padfZ, pabSuccess);
6594 36814 : if (dfSrcCoordPrecision > 0.0)
6595 : {
6596 0 : GWKRoundSourceCoordinates(
6597 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6598 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6599 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6600 : }
6601 : /* ====================================================================
6602 : */
6603 : /* Loop over pixels in output scanline. */
6604 : /* ====================================================================
6605 : */
6606 7743095 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6607 : {
6608 7706282 : GPtrDiff_t iSrcOffset = 0;
6609 7706282 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6610 : padfX, padfY, nSrcXSize,
6611 : nSrcYSize, iSrcOffset))
6612 2164638 : continue;
6613 :
6614 : /* --------------------------------------------------------------------
6615 : */
6616 : /* Do not try to apply invalid source pixels to the dest. */
6617 : /* --------------------------------------------------------------------
6618 : */
6619 7524668 : if (poWK->panUnifiedSrcValid != nullptr &&
6620 931241 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6621 : {
6622 49670 : if (!bOneSourceCornerFailsToReproject)
6623 : {
6624 42185 : continue;
6625 : }
6626 7485 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
6627 : {
6628 5224 : continue;
6629 : }
6630 : }
6631 :
6632 : /* --------------------------------------------------------------------
6633 : */
6634 : /* Do not try to apply transparent source pixels to the
6635 : * destination.*/
6636 : /* --------------------------------------------------------------------
6637 : */
6638 6546016 : double dfDensity = 1.0;
6639 :
6640 6546016 : if (poWK->pafUnifiedSrcDensity != nullptr)
6641 : {
6642 1162245 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
6643 1162245 : if (dfDensity < SRC_DENSITY_THRESHOLD)
6644 1004371 : continue;
6645 : }
6646 :
6647 : /* ====================================================================
6648 : */
6649 : /* Loop processing each band. */
6650 : /* ====================================================================
6651 : */
6652 :
6653 5541654 : const GPtrDiff_t iDstOffset =
6654 5541654 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6655 :
6656 12873738 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6657 : {
6658 7332114 : T value = 0;
6659 7332114 : double dfBandDensity = 0.0;
6660 :
6661 : /* --------------------------------------------------------------------
6662 : */
6663 : /* Collect the source value. */
6664 : /* --------------------------------------------------------------------
6665 : */
6666 7332114 : if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
6667 : &value))
6668 : {
6669 :
6670 7332104 : if (poWK->bApplyVerticalShift)
6671 : {
6672 0 : if (!std::isfinite(padfZ[iDstX]))
6673 0 : continue;
6674 : // Subtract padfZ[] since the coordinate transformation
6675 : // is from target to source
6676 0 : value = GWKClampValueT<T>(
6677 0 : value * poWK->dfMultFactorVerticalShift -
6678 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6679 : }
6680 :
6681 7332104 : if (dfBandDensity < 1.0)
6682 : {
6683 159076 : if (dfBandDensity == 0.0)
6684 : {
6685 : // Do nothing.
6686 : }
6687 : else
6688 : {
6689 : // Let the general code take care of mixing.
6690 159076 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6691 : dfBandDensity, value);
6692 : }
6693 : }
6694 : else
6695 : {
6696 7173023 : reinterpret_cast<T *>(
6697 7173023 : poWK->papabyDstImage[iBand])[iDstOffset] = value;
6698 : }
6699 : }
6700 : }
6701 :
6702 : /* --------------------------------------------------------------------
6703 : */
6704 : /* Mark this pixel valid/opaque in the output. */
6705 : /* --------------------------------------------------------------------
6706 : */
6707 5541654 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6708 :
6709 5541654 : if (poWK->panDstValid != nullptr)
6710 : {
6711 4862206 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6712 : }
6713 : } /* Next iDstX */
6714 :
6715 : /* --------------------------------------------------------------------
6716 : */
6717 : /* Report progress to the user, and optionally cancel out. */
6718 : /* --------------------------------------------------------------------
6719 : */
6720 36814 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6721 0 : break;
6722 : }
6723 :
6724 : /* -------------------------------------------------------------------- */
6725 : /* Cleanup and return. */
6726 : /* -------------------------------------------------------------------- */
6727 343 : CPLFree(padfX);
6728 343 : CPLFree(padfY);
6729 343 : CPLFree(padfZ);
6730 343 : CPLFree(pabSuccess);
6731 343 : }
6732 :
6733 276 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
6734 : {
6735 276 : return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
6736 : }
6737 :
6738 18 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6739 : {
6740 18 : return GWKRun(
6741 : poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
6742 18 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
6743 : }
6744 :
6745 18 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6746 : {
6747 18 : return GWKRun(
6748 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
6749 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
6750 18 : GRA_Bilinear>);
6751 : }
6752 :
6753 6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6754 : {
6755 6 : return GWKRun(
6756 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
6757 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
6758 6 : GRA_Bilinear>);
6759 : }
6760 :
6761 5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6762 : {
6763 5 : return GWKRun(
6764 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
6765 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
6766 5 : GRA_Bilinear>);
6767 : }
6768 :
6769 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6770 :
6771 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6772 : {
6773 : return GWKRun(
6774 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
6775 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
6776 : GRA_Bilinear>);
6777 : }
6778 : #endif
6779 :
6780 5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6781 : {
6782 5 : return GWKRun(
6783 : poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
6784 5 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
6785 : }
6786 :
6787 12 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6788 : {
6789 12 : return GWKRun(
6790 : poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
6791 12 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
6792 : }
6793 :
6794 6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6795 : {
6796 6 : return GWKRun(
6797 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
6798 6 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
6799 : }
6800 :
6801 5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6802 : {
6803 5 : return GWKRun(
6804 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
6805 5 : GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
6806 : }
6807 :
6808 27 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
6809 : {
6810 27 : return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
6811 : }
6812 :
6813 11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6814 : {
6815 11 : return GWKRun(
6816 : poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
6817 11 : GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
6818 : }
6819 :
6820 36 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
6821 : {
6822 36 : return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
6823 : }
6824 :
6825 : /************************************************************************/
6826 : /* GWKAverageOrMode() */
6827 : /* */
6828 : /************************************************************************/
6829 :
6830 : static void GWKAverageOrModeThread(void *pData);
6831 :
6832 130 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
6833 : {
6834 130 : return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
6835 : }
6836 :
6837 : // Overall logic based on GWKGeneralCaseThread().
6838 130 : static void GWKAverageOrModeThread(void *pData)
6839 : {
6840 130 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6841 130 : GDALWarpKernel *poWK = psJob->poWK;
6842 130 : const int iYMin = psJob->iYMin;
6843 130 : const int iYMax = psJob->iYMax;
6844 : const double dfMultFactorVerticalShiftPipeline =
6845 130 : poWK->bApplyVerticalShift
6846 130 : ? CPLAtof(CSLFetchNameValueDef(
6847 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6848 : "1.0"))
6849 130 : : 0.0;
6850 :
6851 130 : const int nDstXSize = poWK->nDstXSize;
6852 130 : const int nSrcXSize = poWK->nSrcXSize;
6853 130 : const int nSrcYSize = poWK->nSrcYSize;
6854 :
6855 : /* -------------------------------------------------------------------- */
6856 : /* Find out which algorithm to use (small optim.) */
6857 : /* -------------------------------------------------------------------- */
6858 130 : int nAlgo = 0;
6859 :
6860 : // Only used for GRA_Mode
6861 130 : float *pafRealVals = nullptr;
6862 130 : float *pafCounts = nullptr;
6863 130 : int nBins = 0;
6864 130 : int nBinsOffset = 0;
6865 130 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
6866 :
6867 : // Only used with nAlgo = 6.
6868 130 : float quant = 0.5;
6869 :
6870 : // To control array allocation only when data type is complex
6871 130 : const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
6872 :
6873 130 : if (poWK->eResample == GRA_Average)
6874 : {
6875 71 : nAlgo = GWKAOM_Average;
6876 : }
6877 59 : else if (poWK->eResample == GRA_RMS)
6878 : {
6879 9 : nAlgo = GWKAOM_RMS;
6880 : }
6881 50 : else if (poWK->eResample == GRA_Mode)
6882 : {
6883 : // TODO check color table count > 256.
6884 23 : if (poWK->eWorkingDataType == GDT_Byte ||
6885 17 : poWK->eWorkingDataType == GDT_UInt16 ||
6886 17 : poWK->eWorkingDataType == GDT_Int16)
6887 : {
6888 14 : nAlgo = GWKAOM_Imode;
6889 :
6890 : // In the case of a paletted or non-paletted byte band,
6891 : // Input values are between 0 and 255.
6892 14 : if (poWK->eWorkingDataType == GDT_Byte)
6893 : {
6894 6 : nBins = 256;
6895 : }
6896 : // In the case of Int8, input values are between -128 and 127.
6897 8 : else if (poWK->eWorkingDataType == GDT_Int8)
6898 : {
6899 0 : nBins = 256;
6900 0 : nBinsOffset = 128;
6901 : }
6902 : // In the case of Int16, input values are between -32768 and 32767.
6903 8 : else if (poWK->eWorkingDataType == GDT_Int16)
6904 : {
6905 8 : nBins = 65536;
6906 8 : nBinsOffset = 32768;
6907 : }
6908 : // In the case of UInt16, input values are between 0 and 65537.
6909 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
6910 : {
6911 0 : nBins = 65536;
6912 : }
6913 : pafCounts =
6914 14 : static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
6915 14 : if (pafCounts == nullptr)
6916 0 : return;
6917 : }
6918 : else
6919 : {
6920 9 : nAlgo = GWKAOM_Fmode;
6921 :
6922 9 : if (nSrcXSize > 0 && nSrcYSize > 0)
6923 : {
6924 : pafRealVals = static_cast<float *>(
6925 9 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
6926 : pafCounts = static_cast<float *>(
6927 9 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
6928 9 : if (pafRealVals == nullptr || pafCounts == nullptr)
6929 : {
6930 0 : VSIFree(pafRealVals);
6931 0 : VSIFree(pafCounts);
6932 0 : return;
6933 : }
6934 : }
6935 : }
6936 : }
6937 27 : else if (poWK->eResample == GRA_Max)
6938 : {
6939 6 : nAlgo = GWKAOM_Max;
6940 : }
6941 21 : else if (poWK->eResample == GRA_Min)
6942 : {
6943 5 : nAlgo = GWKAOM_Min;
6944 : }
6945 16 : else if (poWK->eResample == GRA_Med)
6946 : {
6947 6 : nAlgo = GWKAOM_Quant;
6948 6 : quant = 0.5;
6949 : }
6950 10 : else if (poWK->eResample == GRA_Q1)
6951 : {
6952 5 : nAlgo = GWKAOM_Quant;
6953 5 : quant = 0.25;
6954 : }
6955 5 : else if (poWK->eResample == GRA_Q3)
6956 : {
6957 5 : nAlgo = GWKAOM_Quant;
6958 5 : quant = 0.75;
6959 : }
6960 : #ifdef disabled
6961 : else if (poWK->eResample == GRA_Sum)
6962 : {
6963 : nAlgo = GWKAOM_Sum;
6964 : }
6965 : #endif
6966 : else
6967 : {
6968 : // Other resample algorithms not permitted here.
6969 0 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
6970 : "illegal resample");
6971 0 : return;
6972 : }
6973 :
6974 130 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() using algo %d",
6975 : nAlgo);
6976 :
6977 : /* -------------------------------------------------------------------- */
6978 : /* Allocate x,y,z coordinate arrays for transformation ... two */
6979 : /* scanlines worth of positions. */
6980 : /* -------------------------------------------------------------------- */
6981 :
6982 : double *padfX =
6983 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6984 : double *padfY =
6985 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6986 : double *padfZ =
6987 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6988 : double *padfX2 =
6989 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6990 : double *padfY2 =
6991 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6992 : double *padfZ2 =
6993 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6994 130 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6995 130 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6996 :
6997 130 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6998 130 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6999 130 : const double dfErrorThreshold = CPLAtof(
7000 130 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7001 :
7002 : const double dfExcludedValuesThreshold =
7003 130 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7004 : "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
7005 130 : 100.0;
7006 : const double dfNodataValuesThreshold =
7007 130 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7008 : "NODATA_VALUES_PCT_THRESHOLD", "100")) /
7009 130 : 100.0;
7010 :
7011 : const int nXMargin =
7012 130 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7013 : const int nYMargin =
7014 130 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7015 :
7016 : /* ==================================================================== */
7017 : /* Loop over output lines. */
7018 : /* ==================================================================== */
7019 6627 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7020 : {
7021 :
7022 : /* --------------------------------------------------------------------
7023 : */
7024 : /* Setup points to transform to source image space. */
7025 : /* --------------------------------------------------------------------
7026 : */
7027 1669840 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7028 : {
7029 1663340 : padfX[iDstX] = iDstX + poWK->nDstXOff;
7030 1663340 : padfY[iDstX] = iDstY + poWK->nDstYOff;
7031 1663340 : padfZ[iDstX] = 0.0;
7032 1663340 : padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
7033 1663340 : padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
7034 1663340 : padfZ2[iDstX] = 0.0;
7035 : }
7036 :
7037 : /* --------------------------------------------------------------------
7038 : */
7039 : /* Transform the points from destination pixel/line coordinates */
7040 : /* to source pixel/line coordinates. */
7041 : /* --------------------------------------------------------------------
7042 : */
7043 6497 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
7044 : padfY, padfZ, pabSuccess);
7045 6497 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
7046 : padfY2, padfZ2, pabSuccess2);
7047 :
7048 6497 : if (dfSrcCoordPrecision > 0.0)
7049 : {
7050 0 : GWKRoundSourceCoordinates(
7051 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
7052 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
7053 0 : poWK->nDstXOff, iDstY + poWK->nDstYOff);
7054 0 : GWKRoundSourceCoordinates(
7055 : nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2,
7056 : dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
7057 0 : psJob->pTransformerArg, 1.0 + poWK->nDstXOff,
7058 0 : iDstY + 1.0 + poWK->nDstYOff);
7059 : }
7060 :
7061 : /* ====================================================================
7062 : */
7063 : /* Loop over pixels in output scanline. */
7064 : /* ====================================================================
7065 : */
7066 1669840 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7067 : {
7068 1663340 : GPtrDiff_t iSrcOffset = 0;
7069 1663340 : double dfDensity = 1.0;
7070 1663340 : bool bHasFoundDensity = false;
7071 :
7072 1663340 : if (!pabSuccess[iDstX] || !pabSuccess2[iDstX])
7073 311460 : continue;
7074 :
7075 : // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
7076 : // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
7077 1663340 : if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
7078 1663320 : padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
7079 1663320 : padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
7080 1663300 : padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
7081 1663300 : padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
7082 1663300 : padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
7083 1663290 : padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
7084 1663290 : padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
7085 : {
7086 62 : continue;
7087 : }
7088 :
7089 1663280 : const GPtrDiff_t iDstOffset =
7090 1663280 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7091 :
7092 : // Compute corners in source crs.
7093 :
7094 : // The transformation might not have preserved ordering of
7095 : // coordinates so do the necessary swapping (#5433).
7096 : // NOTE: this is really an approximative fix. To do something
7097 : // more precise we would for example need to compute the
7098 : // transformation of coordinates in the
7099 : // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
7100 : // coordinates, and take the bounding box of the got source
7101 : // coordinates.
7102 :
7103 1663280 : if (padfX[iDstX] > padfX2[iDstX])
7104 268744 : std::swap(padfX[iDstX], padfX2[iDstX]);
7105 :
7106 : // Detect situations where the target pixel is close to the
7107 : // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
7108 : // close to the left-most and right-most columns of the source
7109 : // raster. The 2 value below was experimentally determined to
7110 : // avoid false-positives and false-negatives.
7111 : // Addresses https://github.com/OSGeo/gdal/issues/6478
7112 1663280 : bool bWrapOverX = false;
7113 1663280 : const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
7114 1663280 : if (poWK->nSrcXOff == 0 &&
7115 1663280 : padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
7116 14495 : (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale <
7117 : nThresholdWrapOverX)
7118 : {
7119 : // Check there is a discontinuity by checking at mid-pixel.
7120 : // NOTE: all this remains fragile. To confidently
7121 : // detect antimeridian warping we should probably try to access
7122 : // georeferenced coordinates, and not rely only on tests on
7123 : // image space coordinates. But accessing georeferenced
7124 : // coordinates from here is not trivial, and we would for example
7125 : // have to handle both geographic, Mercator, etc.
7126 : // Let's hope this heuristics is good enough for now.
7127 1041 : double x = iDstX + 0.5 + poWK->nDstXOff;
7128 1041 : double y = iDstY + poWK->nDstYOff;
7129 1041 : double z = 0;
7130 1041 : int bSuccess = FALSE;
7131 1041 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y,
7132 : &z, &bSuccess);
7133 1041 : if (bSuccess && x < padfX[iDstX])
7134 : {
7135 1008 : bWrapOverX = true;
7136 1008 : std::swap(padfX[iDstX], padfX2[iDstX]);
7137 1008 : padfX2[iDstX] += nSrcXSize;
7138 : }
7139 : }
7140 :
7141 1663280 : const double dfXMin = padfX[iDstX] - poWK->nSrcXOff;
7142 1663280 : const double dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
7143 1663280 : constexpr double EPS = 1e-10;
7144 : // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
7145 1663280 : if (!(dfXMax > -EPS && dfXMin < nSrcXSize + EPS))
7146 72 : continue;
7147 1663200 : int iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPS), 0.0));
7148 1663200 : int iSrcXMax = static_cast<int>(
7149 1663200 : std::min(ceil(dfXMax - EPS), static_cast<double>(INT_MAX)));
7150 1663200 : if (!bWrapOverX)
7151 1662200 : iSrcXMax = std::min(iSrcXMax, nSrcXSize);
7152 1663200 : if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
7153 472 : iSrcXMax++;
7154 :
7155 1663200 : if (padfY[iDstX] > padfY2[iDstX])
7156 270117 : std::swap(padfY[iDstX], padfY2[iDstX]);
7157 1663200 : const double dfYMin = padfY[iDstX] - poWK->nSrcYOff;
7158 1663200 : const double dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
7159 : // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
7160 1663200 : if (!(dfYMax > -EPS && dfYMin < nSrcYSize + EPS))
7161 36 : continue;
7162 1663170 : int iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPS), 0.0));
7163 : int iSrcYMax =
7164 1663170 : std::min(static_cast<int>(ceil(dfYMax - EPS)), nSrcYSize);
7165 1663170 : if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
7166 0 : iSrcYMax++;
7167 :
7168 : #define COMPUTE_WEIGHT_Y(iSrcY) \
7169 : ((iSrcY == iSrcYMin) \
7170 : ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin)) \
7171 : : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax) \
7172 : : 1.0)
7173 :
7174 : #define COMPUTE_WEIGHT(iSrcX, dfWeightY) \
7175 : ((iSrcX == iSrcXMin) ? ((iSrcXMin + 1 == iSrcXMax) \
7176 : ? dfWeightY \
7177 : : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
7178 : : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax)) \
7179 : : dfWeightY)
7180 :
7181 1663170 : bool bDone = false;
7182 :
7183 : // Special Average mode where we process all bands together,
7184 : // to avoid averaging tuples that match an entry of m_aadfExcludedValues
7185 2267240 : if (nAlgo == GWKAOM_Average &&
7186 604073 : (!poWK->m_aadfExcludedValues.empty() ||
7187 393224 : dfNodataValuesThreshold < 1 - EPS) &&
7188 2267240 : !poWK->bApplyVerticalShift && !bIsComplex)
7189 : {
7190 393224 : double dfTotalWeightInvalid = 0.0;
7191 393224 : double dfTotalWeightExcluded = 0.0;
7192 393224 : double dfTotalWeightRegular = 0.0;
7193 786448 : std::vector<double> adfValueReal(poWK->nBands, 0);
7194 786448 : std::vector<double> adfValueAveraged(poWK->nBands, 0);
7195 : std::vector<int> anCountExcludedValues(
7196 393224 : poWK->m_aadfExcludedValues.size(), 0);
7197 :
7198 1572890 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7199 : {
7200 1179660 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7201 1179660 : iSrcOffset =
7202 1179660 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7203 5111860 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7204 : iSrcX++, iSrcOffset++)
7205 : {
7206 3932190 : if (bWrapOverX)
7207 0 : iSrcOffset =
7208 0 : (iSrcX % nSrcXSize) +
7209 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7210 :
7211 3932190 : const double dfWeight =
7212 3932190 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7213 3932190 : if (dfWeight <= 0)
7214 0 : continue;
7215 :
7216 3932200 : if (poWK->panUnifiedSrcValid != nullptr &&
7217 12 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7218 : {
7219 3 : dfTotalWeightInvalid += dfWeight;
7220 3 : continue;
7221 : }
7222 :
7223 3932190 : bool bAllValid = true;
7224 7274900 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7225 : {
7226 6160660 : double dfBandDensity = 0;
7227 6160660 : double dfValueImagTmp = 0;
7228 9503370 : if (!(GWKGetPixelValue(
7229 : poWK, iBand, iSrcOffset, &dfBandDensity,
7230 6160660 : &adfValueReal[iBand], &dfValueImagTmp) &&
7231 3342710 : dfBandDensity > BAND_DENSITY_THRESHOLD))
7232 : {
7233 2817950 : bAllValid = false;
7234 2817950 : break;
7235 : }
7236 : }
7237 :
7238 3932190 : if (!bAllValid)
7239 : {
7240 2817950 : dfTotalWeightInvalid += dfWeight;
7241 2817950 : continue;
7242 : }
7243 :
7244 1114240 : bool bExcludedValueFound = false;
7245 2228350 : for (size_t i = 0;
7246 2228350 : i < poWK->m_aadfExcludedValues.size(); ++i)
7247 : {
7248 1114130 : if (poWK->m_aadfExcludedValues[i] == adfValueReal)
7249 : {
7250 21 : bExcludedValueFound = true;
7251 21 : ++anCountExcludedValues[i];
7252 21 : dfTotalWeightExcluded += dfWeight;
7253 21 : break;
7254 : }
7255 : }
7256 1114240 : if (!bExcludedValueFound)
7257 : {
7258 : // Weighted incremental algorithm mean
7259 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7260 1114220 : dfTotalWeightRegular += dfWeight;
7261 4456870 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7262 : {
7263 3342650 : adfValueAveraged[iBand] +=
7264 6685300 : (dfWeight / dfTotalWeightRegular) *
7265 6685300 : (adfValueReal[iBand] -
7266 3342650 : adfValueAveraged[iBand]);
7267 : }
7268 : }
7269 : }
7270 : }
7271 :
7272 393224 : const double dfTotalWeight = dfTotalWeightInvalid +
7273 : dfTotalWeightExcluded +
7274 : dfTotalWeightRegular;
7275 393224 : if (dfTotalWeightInvalid > 0 &&
7276 : dfTotalWeightInvalid >=
7277 311293 : dfNodataValuesThreshold * dfTotalWeight)
7278 : {
7279 : // Do nothing. Let bHasFoundDensity to false.
7280 : }
7281 81934 : else if (dfTotalWeightExcluded > 0 &&
7282 : dfTotalWeightExcluded >=
7283 6 : dfExcludedValuesThreshold * dfTotalWeight)
7284 : {
7285 : // Find the most represented excluded value tuple
7286 3 : size_t iExcludedValue = 0;
7287 3 : int nExcludedValueCount = 0;
7288 6 : for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
7289 : ++i)
7290 : {
7291 3 : if (anCountExcludedValues[i] > nExcludedValueCount)
7292 : {
7293 3 : iExcludedValue = i;
7294 3 : nExcludedValueCount = anCountExcludedValues[i];
7295 : }
7296 : }
7297 :
7298 3 : bHasFoundDensity = true;
7299 :
7300 12 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7301 : {
7302 9 : GWKSetPixelValue(
7303 : poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
7304 9 : poWK->m_aadfExcludedValues[iExcludedValue][iBand],
7305 : 0);
7306 3 : }
7307 : }
7308 81931 : else if (dfTotalWeightRegular > 0)
7309 : {
7310 81931 : bHasFoundDensity = true;
7311 :
7312 327720 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7313 : {
7314 245789 : GWKSetPixelValue(poWK, iBand, iDstOffset,
7315 : /* dfBandDensity = */ 1.0,
7316 245789 : adfValueAveraged[iBand], 0);
7317 : }
7318 : }
7319 :
7320 : // Skip below loop on bands
7321 393224 : bDone = true;
7322 : }
7323 :
7324 : /* ====================================================================
7325 : */
7326 : /* Loop processing each band. */
7327 : /* ====================================================================
7328 : */
7329 :
7330 4439540 : for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
7331 : {
7332 2776380 : double dfBandDensity = 0.0;
7333 2776380 : double dfValueReal = 0.0;
7334 2776380 : double dfValueImag = 0.0;
7335 2776380 : double dfValueRealTmp = 0.0;
7336 2776380 : double dfValueImagTmp = 0.0;
7337 :
7338 : /* --------------------------------------------------------------------
7339 : */
7340 : /* Collect the source value. */
7341 : /* --------------------------------------------------------------------
7342 : */
7343 :
7344 : // Loop over source lines and pixels - 3 possible algorithms.
7345 :
7346 : // poWK->eResample == GRA_Average.
7347 2776380 : if (nAlgo == GWKAOM_Average)
7348 : {
7349 300849 : double dfTotalWeight = 0.0;
7350 :
7351 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7352 : // in gcore/overview.cpp.
7353 631308 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7354 : {
7355 330459 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7356 330459 : iSrcOffset = iSrcXMin +
7357 330459 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7358 803200 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7359 : iSrcX++, iSrcOffset++)
7360 : {
7361 472741 : if (bWrapOverX)
7362 630 : iSrcOffset =
7363 630 : (iSrcX % nSrcXSize) +
7364 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7365 :
7366 472745 : if (poWK->panUnifiedSrcValid != nullptr &&
7367 4 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7368 : iSrcOffset))
7369 : {
7370 1 : continue;
7371 : }
7372 :
7373 472740 : if (GWKGetPixelValue(
7374 : poWK, iBand, iSrcOffset, &dfBandDensity,
7375 945480 : &dfValueRealTmp, &dfValueImagTmp) &&
7376 472740 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7377 : {
7378 472740 : const double dfWeight =
7379 472740 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7380 472740 : if (dfWeight > 0)
7381 : {
7382 : // Weighted incremental algorithm mean
7383 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7384 472740 : dfTotalWeight += dfWeight;
7385 472740 : dfValueReal +=
7386 472740 : (dfWeight / dfTotalWeight) *
7387 472740 : (dfValueRealTmp - dfValueReal);
7388 472740 : if (bIsComplex)
7389 : {
7390 252 : dfValueImag +=
7391 252 : (dfWeight / dfTotalWeight) *
7392 252 : (dfValueImagTmp - dfValueImag);
7393 : }
7394 : }
7395 : }
7396 : }
7397 : }
7398 :
7399 300849 : if (dfTotalWeight > 0)
7400 : {
7401 300849 : if (poWK->bApplyVerticalShift)
7402 : {
7403 0 : if (!std::isfinite(padfZ[iDstX]))
7404 0 : continue;
7405 : // Subtract padfZ[] since the coordinate
7406 : // transformation is from target to source
7407 0 : dfValueReal =
7408 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7409 0 : padfZ[iDstX] *
7410 : dfMultFactorVerticalShiftPipeline;
7411 : }
7412 :
7413 300849 : dfBandDensity = 1;
7414 300849 : bHasFoundDensity = true;
7415 : }
7416 : } // GRA_Average.
7417 : // poWK->eResample == GRA_RMS.
7418 2776380 : if (nAlgo == GWKAOM_RMS)
7419 : {
7420 300416 : double dfTotalReal = 0.0;
7421 300416 : double dfTotalImag = 0.0;
7422 300416 : double dfTotalWeight = 0.0;
7423 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7424 : // in gcore/overview.cpp.
7425 630578 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7426 : {
7427 330162 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7428 330162 : iSrcOffset = iSrcXMin +
7429 330162 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7430 802723 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7431 : iSrcX++, iSrcOffset++)
7432 : {
7433 472561 : if (bWrapOverX)
7434 630 : iSrcOffset =
7435 630 : (iSrcX % nSrcXSize) +
7436 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7437 :
7438 472561 : if (poWK->panUnifiedSrcValid != nullptr &&
7439 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7440 : iSrcOffset))
7441 : {
7442 0 : continue;
7443 : }
7444 :
7445 472561 : if (GWKGetPixelValue(
7446 : poWK, iBand, iSrcOffset, &dfBandDensity,
7447 945122 : &dfValueRealTmp, &dfValueImagTmp) &&
7448 472561 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7449 : {
7450 472561 : const double dfWeight =
7451 472561 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7452 472561 : dfTotalWeight += dfWeight;
7453 472561 : dfTotalReal +=
7454 472561 : dfValueRealTmp * dfValueRealTmp * dfWeight;
7455 472561 : if (bIsComplex)
7456 48 : dfTotalImag += dfValueImagTmp *
7457 48 : dfValueImagTmp * dfWeight;
7458 : }
7459 : }
7460 : }
7461 :
7462 300416 : if (dfTotalWeight > 0)
7463 : {
7464 300416 : dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
7465 :
7466 300416 : if (poWK->bApplyVerticalShift)
7467 : {
7468 0 : if (!std::isfinite(padfZ[iDstX]))
7469 0 : continue;
7470 : // Subtract padfZ[] since the coordinate
7471 : // transformation is from target to source
7472 0 : dfValueReal =
7473 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7474 0 : padfZ[iDstX] *
7475 : dfMultFactorVerticalShiftPipeline;
7476 : }
7477 :
7478 300416 : if (bIsComplex)
7479 12 : dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
7480 :
7481 300416 : dfBandDensity = 1;
7482 300416 : bHasFoundDensity = true;
7483 : }
7484 : } // GRA_RMS.
7485 : #ifdef disabled
7486 : else if (nAlgo == GWKAOM_Sum)
7487 : // poWK->eResample == GRA_Sum
7488 : {
7489 : double dfTotalReal = 0.0;
7490 : double dfTotalImag = 0.0;
7491 : bool bFoundValid = false;
7492 :
7493 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7494 : {
7495 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7496 : iSrcOffset = iSrcXMin +
7497 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7498 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7499 : iSrcX++, iSrcOffset++)
7500 : {
7501 : if (bWrapOverX)
7502 : iSrcOffset =
7503 : (iSrcX % nSrcXSize) +
7504 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7505 :
7506 : if (poWK->panUnifiedSrcValid != nullptr &&
7507 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7508 : iSrcOffset))
7509 : {
7510 : continue;
7511 : }
7512 :
7513 : if (GWKGetPixelValue(
7514 : poWK, iBand, iSrcOffset, &dfBandDensity,
7515 : &dfValueRealTmp, &dfValueImagTmp) &&
7516 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7517 : {
7518 : const double dfWeight =
7519 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7520 : bFoundValid = true;
7521 : dfTotalReal += dfValueRealTmp * dfWeight;
7522 : if (bIsComplex)
7523 : {
7524 : dfTotalImag += dfValueImagTmp * dfWeight;
7525 : }
7526 : }
7527 : }
7528 : }
7529 :
7530 : if (bFoundValid)
7531 : {
7532 : dfValueReal = dfTotalReal;
7533 :
7534 : if (poWK->bApplyVerticalShift)
7535 : {
7536 : if (!std::isfinite(padfZ[iDstX]))
7537 : continue;
7538 : // Subtract padfZ[] since the coordinate
7539 : // transformation is from target to source
7540 : dfValueReal =
7541 : dfValueReal * poWK->dfMultFactorVerticalShift -
7542 : padfZ[iDstX] *
7543 : dfMultFactorVerticalShiftPipeline;
7544 : }
7545 :
7546 : if (bIsComplex)
7547 : {
7548 : dfValueImag = dfTotalImag;
7549 : }
7550 : dfBandDensity = 1;
7551 : bHasFoundDensity = true;
7552 : }
7553 : } // GRA_Sum.
7554 : #endif
7555 2475960 : else if (nAlgo == GWKAOM_Imode || nAlgo == GWKAOM_Fmode)
7556 : // poWK->eResample == GRA_Mode
7557 : {
7558 : // This code adapted from GDALDownsampleChunk32R_Mode() in
7559 : // gcore/overview.cpp.
7560 500026 : if (nAlgo == GWKAOM_Fmode) // int32 or float.
7561 : {
7562 : // Does it make sense it makes to run a
7563 : // majority filter on floating point data? But, here it
7564 : // is for the sake of compatibility. It won't look
7565 : // right on RGB images by the nature of the filter.
7566 3407 : nBins = 0;
7567 3407 : int iModeIndex = -1;
7568 :
7569 10228 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7570 : {
7571 6821 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7572 6821 : iSrcOffset =
7573 6821 : iSrcXMin +
7574 6821 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7575 20484 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7576 : iSrcX++, iSrcOffset++)
7577 : {
7578 13663 : if (bWrapOverX)
7579 0 : iSrcOffset =
7580 0 : (iSrcX % nSrcXSize) +
7581 0 : static_cast<GPtrDiff_t>(iSrcY) *
7582 0 : nSrcXSize;
7583 :
7584 13663 : if (poWK->panUnifiedSrcValid != nullptr &&
7585 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7586 : iSrcOffset))
7587 0 : continue;
7588 :
7589 13663 : if (GWKGetPixelValue(
7590 : poWK, iBand, iSrcOffset, &dfBandDensity,
7591 27326 : &dfValueRealTmp, &dfValueImagTmp) &&
7592 13663 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7593 : {
7594 13663 : const float fVal =
7595 13663 : static_cast<float>(dfValueRealTmp);
7596 13663 : const double dfWeight =
7597 13663 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7598 :
7599 : // Check array for existing entry.
7600 13663 : int i = 0;
7601 29135 : for (i = 0; i < nBins; ++i)
7602 : {
7603 17768 : if (pafRealVals[i] == fVal)
7604 : {
7605 :
7606 2296 : pafCounts[i] +=
7607 2296 : static_cast<float>(dfWeight);
7608 2296 : bool bValIsMaxCount =
7609 2296 : (pafCounts[i] >
7610 2296 : pafCounts[iModeIndex]);
7611 :
7612 2296 : if (!bValIsMaxCount &&
7613 1492 : pafCounts[i] ==
7614 1492 : pafCounts[iModeIndex])
7615 : {
7616 1487 : switch (eTieStrategy)
7617 : {
7618 1474 : case GWKTS_First:
7619 1474 : break;
7620 6 : case GWKTS_Min:
7621 6 : bValIsMaxCount =
7622 : fVal <
7623 : pafRealVals
7624 6 : [iModeIndex];
7625 6 : break;
7626 7 : case GWKTS_Max:
7627 7 : bValIsMaxCount =
7628 : fVal >
7629 : pafRealVals
7630 7 : [iModeIndex];
7631 7 : break;
7632 : }
7633 : }
7634 :
7635 2296 : if (bValIsMaxCount)
7636 : {
7637 807 : iModeIndex = i;
7638 : }
7639 :
7640 2296 : break;
7641 : }
7642 : }
7643 :
7644 : // Add to arr if entry not already there.
7645 13663 : if (i == nBins)
7646 : {
7647 11367 : pafRealVals[i] = fVal;
7648 11367 : pafCounts[i] =
7649 11367 : static_cast<float>(dfWeight);
7650 :
7651 11367 : if (iModeIndex < 0)
7652 3407 : iModeIndex = i;
7653 :
7654 11367 : ++nBins;
7655 : }
7656 : }
7657 : }
7658 : }
7659 :
7660 3407 : if (iModeIndex != -1)
7661 : {
7662 3407 : dfValueReal = pafRealVals[iModeIndex];
7663 :
7664 3407 : if (poWK->bApplyVerticalShift)
7665 : {
7666 0 : if (!std::isfinite(padfZ[iDstX]))
7667 0 : continue;
7668 : // Subtract padfZ[] since the coordinate
7669 : // transformation is from target to source
7670 0 : dfValueReal =
7671 0 : dfValueReal *
7672 0 : poWK->dfMultFactorVerticalShift -
7673 0 : padfZ[iDstX] *
7674 : dfMultFactorVerticalShiftPipeline;
7675 : }
7676 :
7677 3407 : dfBandDensity = 1;
7678 3407 : bHasFoundDensity = true;
7679 : }
7680 : }
7681 : else // byte or int16.
7682 : {
7683 496619 : float fMaxCount = 0.0f;
7684 496619 : int nMode = -1;
7685 496619 : bool bHasSourceValues = false;
7686 :
7687 496619 : memset(pafCounts, 0, nBins * sizeof(float));
7688 :
7689 1612550 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7690 : {
7691 1115930 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7692 1115930 : iSrcOffset =
7693 1115930 : iSrcXMin +
7694 1115930 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7695 4733150 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7696 : iSrcX++, iSrcOffset++)
7697 : {
7698 3617220 : if (bWrapOverX)
7699 630 : iSrcOffset =
7700 630 : (iSrcX % nSrcXSize) +
7701 630 : static_cast<GPtrDiff_t>(iSrcY) *
7702 630 : nSrcXSize;
7703 :
7704 3617220 : if (poWK->panUnifiedSrcValid != nullptr &&
7705 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7706 : iSrcOffset))
7707 0 : continue;
7708 :
7709 3617220 : if (GWKGetPixelValue(
7710 : poWK, iBand, iSrcOffset, &dfBandDensity,
7711 7234430 : &dfValueRealTmp, &dfValueImagTmp) &&
7712 3617220 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7713 : {
7714 3617220 : bHasSourceValues = true;
7715 3617220 : const int nVal =
7716 3617220 : static_cast<int>(dfValueRealTmp);
7717 3617220 : const int iBin = nVal + nBinsOffset;
7718 3617220 : const double dfWeight =
7719 3617220 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7720 :
7721 : // Sum the density.
7722 3617220 : pafCounts[iBin] +=
7723 3617220 : static_cast<float>(dfWeight);
7724 : // Is it the most common value so far?
7725 3617220 : bool bUpdateMode =
7726 3617220 : pafCounts[iBin] > fMaxCount;
7727 3617220 : if (!bUpdateMode &&
7728 778312 : pafCounts[iBin] == fMaxCount)
7729 : {
7730 218624 : switch (eTieStrategy)
7731 : {
7732 218616 : case GWKTS_First:
7733 218616 : break;
7734 4 : case GWKTS_Min:
7735 4 : bUpdateMode = nVal < nMode;
7736 4 : break;
7737 4 : case GWKTS_Max:
7738 4 : bUpdateMode = nVal > nMode;
7739 4 : break;
7740 : }
7741 : }
7742 3617220 : if (bUpdateMode)
7743 : {
7744 2838910 : nMode = nVal;
7745 2838910 : fMaxCount = pafCounts[iBin];
7746 : }
7747 : }
7748 : }
7749 : }
7750 :
7751 496619 : if (bHasSourceValues)
7752 : {
7753 496619 : dfValueReal = nMode;
7754 :
7755 496619 : if (poWK->bApplyVerticalShift)
7756 : {
7757 0 : if (!std::isfinite(padfZ[iDstX]))
7758 0 : continue;
7759 : // Subtract padfZ[] since the coordinate
7760 : // transformation is from target to source
7761 0 : dfValueReal =
7762 0 : dfValueReal *
7763 0 : poWK->dfMultFactorVerticalShift -
7764 0 : padfZ[iDstX] *
7765 : dfMultFactorVerticalShiftPipeline;
7766 : }
7767 :
7768 496619 : dfBandDensity = 1;
7769 496619 : bHasFoundDensity = true;
7770 : }
7771 500026 : }
7772 : } // GRA_Mode.
7773 1975930 : else if (nAlgo == GWKAOM_Max)
7774 : // poWK->eResample == GRA_Max.
7775 : {
7776 335037 : bool bFoundValid = false;
7777 335037 : double dfTotalReal = std::numeric_limits<double>::lowest();
7778 : // This code adapted from nAlgo 1 method, GRA_Average.
7779 1288010 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7780 : {
7781 952975 : iSrcOffset = iSrcXMin +
7782 952975 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7783 4406540 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7784 : iSrcX++, iSrcOffset++)
7785 : {
7786 3453560 : if (bWrapOverX)
7787 630 : iSrcOffset =
7788 630 : (iSrcX % nSrcXSize) +
7789 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7790 :
7791 3456370 : if (poWK->panUnifiedSrcValid != nullptr &&
7792 2809 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7793 : iSrcOffset))
7794 : {
7795 2446 : continue;
7796 : }
7797 :
7798 : // Returns pixel value if it is not no data.
7799 3451120 : if (GWKGetPixelValue(
7800 : poWK, iBand, iSrcOffset, &dfBandDensity,
7801 6902230 : &dfValueRealTmp, &dfValueImagTmp) &&
7802 3451120 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7803 : {
7804 3451120 : bFoundValid = true;
7805 3451120 : if (dfTotalReal < dfValueRealTmp)
7806 : {
7807 442642 : dfTotalReal = dfValueRealTmp;
7808 : }
7809 : }
7810 : }
7811 : }
7812 :
7813 335037 : if (bFoundValid)
7814 : {
7815 335037 : dfValueReal = dfTotalReal;
7816 :
7817 335037 : if (poWK->bApplyVerticalShift)
7818 : {
7819 0 : if (!std::isfinite(padfZ[iDstX]))
7820 0 : continue;
7821 : // Subtract padfZ[] since the coordinate
7822 : // transformation is from target to source
7823 0 : dfValueReal =
7824 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7825 0 : padfZ[iDstX] *
7826 : dfMultFactorVerticalShiftPipeline;
7827 : }
7828 :
7829 335037 : dfBandDensity = 1;
7830 335037 : bHasFoundDensity = true;
7831 : }
7832 : } // GRA_Max.
7833 1640900 : else if (nAlgo == GWKAOM_Min)
7834 : // poWK->eResample == GRA_Min.
7835 : {
7836 335012 : bool bFoundValid = false;
7837 335012 : double dfTotalReal = std::numeric_limits<double>::max();
7838 : // This code adapted from nAlgo 1 method, GRA_Average.
7839 1287720 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7840 : {
7841 952710 : iSrcOffset = iSrcXMin +
7842 952710 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7843 4403460 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7844 : iSrcX++, iSrcOffset++)
7845 : {
7846 3450750 : if (bWrapOverX)
7847 630 : iSrcOffset =
7848 630 : (iSrcX % nSrcXSize) +
7849 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7850 :
7851 3450750 : if (poWK->panUnifiedSrcValid != nullptr &&
7852 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7853 : iSrcOffset))
7854 : {
7855 0 : continue;
7856 : }
7857 :
7858 : // Returns pixel value if it is not no data.
7859 3450750 : if (GWKGetPixelValue(
7860 : poWK, iBand, iSrcOffset, &dfBandDensity,
7861 6901500 : &dfValueRealTmp, &dfValueImagTmp) &&
7862 3450750 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7863 : {
7864 3450750 : bFoundValid = true;
7865 3450750 : if (dfTotalReal > dfValueRealTmp)
7866 : {
7867 443069 : dfTotalReal = dfValueRealTmp;
7868 : }
7869 : }
7870 : }
7871 : }
7872 :
7873 335012 : if (bFoundValid)
7874 : {
7875 335012 : dfValueReal = dfTotalReal;
7876 :
7877 335012 : if (poWK->bApplyVerticalShift)
7878 : {
7879 0 : if (!std::isfinite(padfZ[iDstX]))
7880 0 : continue;
7881 : // Subtract padfZ[] since the coordinate
7882 : // transformation is from target to source
7883 0 : dfValueReal =
7884 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7885 0 : padfZ[iDstX] *
7886 : dfMultFactorVerticalShiftPipeline;
7887 : }
7888 :
7889 335012 : dfBandDensity = 1;
7890 335012 : bHasFoundDensity = true;
7891 : }
7892 : } // GRA_Min.
7893 1305880 : else if (nAlgo == GWKAOM_Quant)
7894 : // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
7895 : {
7896 1005040 : bool bFoundValid = false;
7897 1005040 : std::vector<double> dfRealValuesTmp;
7898 :
7899 : // This code adapted from nAlgo 1 method, GRA_Average.
7900 3863170 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7901 : {
7902 2858130 : iSrcOffset = iSrcXMin +
7903 2858130 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7904 13210400 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7905 : iSrcX++, iSrcOffset++)
7906 : {
7907 10352300 : if (bWrapOverX)
7908 1890 : iSrcOffset =
7909 1890 : (iSrcX % nSrcXSize) +
7910 1890 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7911 :
7912 10352300 : if (poWK->panUnifiedSrcValid != nullptr &&
7913 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7914 : iSrcOffset))
7915 : {
7916 0 : continue;
7917 : }
7918 :
7919 : // Returns pixel value if it is not no data.
7920 10352300 : if (GWKGetPixelValue(
7921 : poWK, iBand, iSrcOffset, &dfBandDensity,
7922 20704500 : &dfValueRealTmp, &dfValueImagTmp) &&
7923 10352300 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7924 : {
7925 10352300 : bFoundValid = true;
7926 10352300 : dfRealValuesTmp.push_back(dfValueRealTmp);
7927 : }
7928 : }
7929 : }
7930 :
7931 1005040 : if (bFoundValid)
7932 : {
7933 1005040 : std::sort(dfRealValuesTmp.begin(),
7934 : dfRealValuesTmp.end());
7935 : int quantIdx = static_cast<int>(
7936 1005040 : std::ceil(quant * dfRealValuesTmp.size() - 1));
7937 1005040 : dfValueReal = dfRealValuesTmp[quantIdx];
7938 :
7939 1005040 : if (poWK->bApplyVerticalShift)
7940 : {
7941 0 : if (!std::isfinite(padfZ[iDstX]))
7942 0 : continue;
7943 : // Subtract padfZ[] since the coordinate
7944 : // transformation is from target to source
7945 0 : dfValueReal =
7946 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7947 0 : padfZ[iDstX] *
7948 : dfMultFactorVerticalShiftPipeline;
7949 : }
7950 :
7951 1005040 : dfBandDensity = 1;
7952 1005040 : bHasFoundDensity = true;
7953 1005040 : dfRealValuesTmp.clear();
7954 : }
7955 : } // Quantile.
7956 :
7957 : /* --------------------------------------------------------------------
7958 : */
7959 : /* We have a computed value from the source. Now apply it
7960 : * to */
7961 : /* the destination pixel. */
7962 : /* --------------------------------------------------------------------
7963 : */
7964 2776380 : if (bHasFoundDensity)
7965 : {
7966 : // TODO: Should we compute dfBandDensity in fct of
7967 : // nCount/nCount2, or use as a threshold to set the dest
7968 : // value?
7969 : // dfBandDensity = (float) nCount / nCount2;
7970 : // if( (float) nCount / nCount2 > 0.1 )
7971 : // or fix gdalwarp crop_to_cutline to crop partially
7972 : // overlapping pixels.
7973 2776380 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
7974 : dfValueReal, dfValueImag);
7975 : }
7976 : }
7977 :
7978 1663170 : if (!bHasFoundDensity)
7979 311290 : continue;
7980 :
7981 : /* --------------------------------------------------------------------
7982 : */
7983 : /* Update destination density/validity masks. */
7984 : /* --------------------------------------------------------------------
7985 : */
7986 1351880 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7987 :
7988 1351880 : if (poWK->panDstValid != nullptr)
7989 : {
7990 74 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7991 : }
7992 : } /* Next iDstX */
7993 :
7994 : /* --------------------------------------------------------------------
7995 : */
7996 : /* Report progress to the user, and optionally cancel out. */
7997 : /* --------------------------------------------------------------------
7998 : */
7999 6497 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8000 0 : break;
8001 : }
8002 :
8003 : /* -------------------------------------------------------------------- */
8004 : /* Cleanup and return. */
8005 : /* -------------------------------------------------------------------- */
8006 130 : CPLFree(padfX);
8007 130 : CPLFree(padfY);
8008 130 : CPLFree(padfZ);
8009 130 : CPLFree(padfX2);
8010 130 : CPLFree(padfY2);
8011 130 : CPLFree(padfZ2);
8012 130 : CPLFree(pabSuccess);
8013 130 : CPLFree(pabSuccess2);
8014 130 : VSIFree(pafCounts);
8015 130 : VSIFree(pafRealVals);
8016 : }
8017 :
8018 : /************************************************************************/
8019 : /* getOrientation() */
8020 : /************************************************************************/
8021 :
8022 : typedef std::pair<double, double> XYPair;
8023 :
8024 : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
8025 : // -1 if it is counter-clockwise oriented,
8026 : // or 0 if it is colinear.
8027 2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
8028 : {
8029 2355910 : const double p1x = p1.first;
8030 2355910 : const double p1y = p1.second;
8031 2355910 : const double p2x = p2.first;
8032 2355910 : const double p2y = p2.second;
8033 2355910 : const double p3x = p3.first;
8034 2355910 : const double p3y = p3.second;
8035 2355910 : const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
8036 2355910 : if (std::abs(val) < 1e-20)
8037 2690 : return 0;
8038 2353220 : else if (val > 0)
8039 0 : return 1;
8040 : else
8041 2353220 : return -1;
8042 : }
8043 :
8044 : /************************************************************************/
8045 : /* isConvex() */
8046 : /************************************************************************/
8047 :
8048 : typedef std::vector<XYPair> XYPoly;
8049 :
8050 : // poly must be closed
8051 785302 : static bool isConvex(const XYPoly &poly)
8052 : {
8053 785302 : const size_t n = poly.size();
8054 785302 : size_t i = 0;
8055 785302 : int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8056 785302 : ++i;
8057 2355910 : for (; i < n - 2; ++i)
8058 : {
8059 : const int orientation =
8060 1570600 : getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8061 1570600 : if (orientation != 0)
8062 : {
8063 1567910 : if (last_orientation == 0)
8064 0 : last_orientation = orientation;
8065 1567910 : else if (orientation != last_orientation)
8066 0 : return false;
8067 : }
8068 : }
8069 785302 : return true;
8070 : }
8071 :
8072 : /************************************************************************/
8073 : /* pointIntersectsConvexPoly() */
8074 : /************************************************************************/
8075 :
8076 : // Returns whether xy intersects poly, that must be closed and convex.
8077 6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
8078 : {
8079 6049100 : const size_t n = poly.size();
8080 6049100 : double dx1 = xy.first - poly[0].first;
8081 6049100 : double dy1 = xy.second - poly[0].second;
8082 6049100 : double dx2 = poly[1].first - poly[0].first;
8083 6049100 : double dy2 = poly[1].second - poly[0].second;
8084 6049100 : double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
8085 :
8086 : // Check if the point remains on the same side (left/right) of all edges
8087 14556400 : for (size_t i = 2; i < n; i++)
8088 : {
8089 12793100 : dx1 = xy.first - poly[i - 1].first;
8090 12793100 : dy1 = xy.second - poly[i - 1].second;
8091 :
8092 12793100 : dx2 = poly[i].first - poly[i - 1].first;
8093 12793100 : dy2 = poly[i].second - poly[i - 1].second;
8094 :
8095 12793100 : double crossProduct = dx1 * dy2 - dx2 * dy1;
8096 12793100 : if (std::abs(prevCrossProduct) < 1e-20)
8097 725558 : prevCrossProduct = crossProduct;
8098 12067500 : else if (prevCrossProduct * crossProduct < 0)
8099 4285760 : return false;
8100 : }
8101 :
8102 1763340 : return true;
8103 : }
8104 :
8105 : /************************************************************************/
8106 : /* getIntersection() */
8107 : /************************************************************************/
8108 :
8109 : /* Returns intersection of [p1,p2] with [p3,p4], if
8110 : * it is a single point, and the 2 segments are not colinear.
8111 : */
8112 11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
8113 : const XYPair &p3, const XYPair &p4, XYPair &xy)
8114 : {
8115 11811000 : const double x1 = p1.first;
8116 11811000 : const double y1 = p1.second;
8117 11811000 : const double x2 = p2.first;
8118 11811000 : const double y2 = p2.second;
8119 11811000 : const double x3 = p3.first;
8120 11811000 : const double y3 = p3.second;
8121 11811000 : const double x4 = p4.first;
8122 11811000 : const double y4 = p4.second;
8123 11811000 : const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
8124 11811000 : const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
8125 11811000 : if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
8126 9260780 : return false;
8127 :
8128 2550260 : const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
8129 2550260 : if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
8130 973924 : return false;
8131 :
8132 1576340 : const double t = t_num / denom;
8133 1576340 : xy.first = x1 + t * (x2 - x1);
8134 1576340 : xy.second = y1 + t * (y2 - y1);
8135 1576340 : return true;
8136 : }
8137 :
8138 : /************************************************************************/
8139 : /* getConvexPolyIntersection() */
8140 : /************************************************************************/
8141 :
8142 : // poly1 and poly2 must be closed and convex.
8143 : // The returned intersection will not necessary be closed.
8144 785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
8145 : XYPoly &intersection)
8146 : {
8147 785302 : intersection.clear();
8148 :
8149 : // Add all points of poly1 inside poly2
8150 3926510 : for (size_t i = 0; i < poly1.size() - 1; ++i)
8151 : {
8152 3141210 : if (pointIntersectsConvexPoly(poly1[i], poly2))
8153 1187430 : intersection.push_back(poly1[i]);
8154 : }
8155 785302 : if (intersection.size() == poly1.size() - 1)
8156 : {
8157 : // poly1 is inside poly2
8158 119100 : return;
8159 : }
8160 :
8161 : // Add all points of poly2 inside poly1
8162 3634860 : for (size_t i = 0; i < poly2.size() - 1; ++i)
8163 : {
8164 2907890 : if (pointIntersectsConvexPoly(poly2[i], poly1))
8165 575904 : intersection.push_back(poly2[i]);
8166 : }
8167 :
8168 : // Compute the intersection of all edges of both polygons
8169 726972 : XYPair xy;
8170 3634860 : for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
8171 : {
8172 14539400 : for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
8173 : {
8174 11631600 : if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
8175 11631600 : poly2[i2 + 1], xy))
8176 : {
8177 1576230 : intersection.push_back(xy);
8178 : }
8179 : }
8180 : }
8181 :
8182 726972 : if (intersection.empty())
8183 60770 : return;
8184 :
8185 : // Find lowest-left point in intersection set
8186 666202 : double lowest_x = std::numeric_limits<double>::max();
8187 666202 : double lowest_y = std::numeric_limits<double>::max();
8188 3772450 : for (const auto &pair : intersection)
8189 : {
8190 3106240 : const double x = pair.first;
8191 3106240 : const double y = pair.second;
8192 3106240 : if (y < lowest_y || (y == lowest_y && x < lowest_x))
8193 : {
8194 1096040 : lowest_x = x;
8195 1096040 : lowest_y = y;
8196 : }
8197 : }
8198 :
8199 5737980 : const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
8200 : {
8201 5737980 : const double p1x_diff = p1.first - lowest_x;
8202 5737980 : const double p1y_diff = p1.second - lowest_y;
8203 5737980 : const double p2x_diff = p2.first - lowest_x;
8204 5737980 : const double p2y_diff = p2.second - lowest_y;
8205 5737980 : if (p2y_diff == 0.0 && p1y_diff == 0.0)
8206 : {
8207 2655420 : if (p1x_diff >= 0)
8208 : {
8209 2655420 : if (p2x_diff >= 0)
8210 2655420 : return p1.first < p2.first;
8211 0 : return true;
8212 : }
8213 : else
8214 : {
8215 0 : if (p2x_diff >= 0)
8216 0 : return false;
8217 0 : return p1.first < p2.first;
8218 : }
8219 : }
8220 :
8221 3082560 : if (p2x_diff == 0.0 && p1x_diff == 0.0)
8222 1046960 : return p1.second < p2.second;
8223 :
8224 : double tan_p1;
8225 2035600 : if (p1x_diff == 0.0)
8226 464622 : tan_p1 = p1y_diff == 0.0 ? 0.0 : std::numeric_limits<double>::max();
8227 : else
8228 1570980 : tan_p1 = p1y_diff / p1x_diff;
8229 :
8230 : double tan_p2;
8231 2035600 : if (p2x_diff == 0.0)
8232 839515 : tan_p2 = p2y_diff == 0.0 ? 0.0 : std::numeric_limits<double>::max();
8233 : else
8234 1196080 : tan_p2 = p2y_diff / p2x_diff;
8235 :
8236 2035600 : if (tan_p1 >= 0)
8237 : {
8238 1904790 : if (tan_p2 >= 0)
8239 1881590 : return tan_p1 < tan_p2;
8240 : else
8241 23199 : return true;
8242 : }
8243 : else
8244 : {
8245 130806 : if (tan_p2 >= 0)
8246 103900 : return false;
8247 : else
8248 26906 : return tan_p1 < tan_p2;
8249 : }
8250 666202 : };
8251 :
8252 : // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
8253 : // hull
8254 666202 : std::sort(intersection.begin(), intersection.end(), sortFunc);
8255 :
8256 : // Remove duplicated points
8257 666202 : size_t j = 1;
8258 3106240 : for (size_t i = 1; i < intersection.size(); ++i)
8259 : {
8260 2440040 : if (intersection[i] != intersection[i - 1])
8261 : {
8262 1452560 : if (j < i)
8263 545275 : intersection[j] = intersection[i];
8264 1452560 : ++j;
8265 : }
8266 : }
8267 666202 : intersection.resize(j);
8268 : }
8269 :
8270 : /************************************************************************/
8271 : /* getArea() */
8272 : /************************************************************************/
8273 :
8274 : // poly may or may not be closed.
8275 558521 : static double getArea(const XYPoly &poly)
8276 : {
8277 : // CPLAssert(poly.size() >= 2);
8278 558521 : const size_t nPointCount = poly.size();
8279 : double dfAreaSum =
8280 558521 : poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
8281 :
8282 1765140 : for (size_t i = 1; i < nPointCount - 1; i++)
8283 : {
8284 1206610 : dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
8285 : }
8286 :
8287 558521 : dfAreaSum += poly[nPointCount - 1].first *
8288 558521 : (poly[0].second - poly[nPointCount - 2].second);
8289 :
8290 558521 : return 0.5 * std::fabs(dfAreaSum);
8291 : }
8292 :
8293 : /************************************************************************/
8294 : /* GWKSumPreserving() */
8295 : /************************************************************************/
8296 :
8297 : static void GWKSumPreservingThread(void *pData);
8298 :
8299 18 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
8300 : {
8301 18 : return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
8302 : }
8303 :
8304 18 : static void GWKSumPreservingThread(void *pData)
8305 : {
8306 18 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
8307 18 : GDALWarpKernel *poWK = psJob->poWK;
8308 18 : const int iYMin = psJob->iYMin;
8309 18 : const int iYMax = psJob->iYMax;
8310 : const bool bIsAffineNoRotation =
8311 18 : GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
8312 26 : poWK->pTransformerArg) &&
8313 : // for debug/testing purposes
8314 8 : CPLTestBool(
8315 18 : CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
8316 :
8317 18 : const int nDstXSize = poWK->nDstXSize;
8318 18 : const int nSrcXSize = poWK->nSrcXSize;
8319 18 : const int nSrcYSize = poWK->nSrcYSize;
8320 :
8321 36 : std::vector<double> adfX0(nSrcXSize + 1);
8322 36 : std::vector<double> adfY0(nSrcXSize + 1);
8323 36 : std::vector<double> adfZ0(nSrcXSize + 1);
8324 36 : std::vector<double> adfX1(nSrcXSize + 1);
8325 36 : std::vector<double> adfY1(nSrcXSize + 1);
8326 36 : std::vector<double> adfZ1(nSrcXSize + 1);
8327 36 : std::vector<int> abSuccess0(nSrcXSize + 1);
8328 36 : std::vector<int> abSuccess1(nSrcXSize + 1);
8329 :
8330 : CPLRectObj sGlobalBounds;
8331 18 : sGlobalBounds.minx = -2 * poWK->dfXScale;
8332 18 : sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
8333 18 : sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
8334 18 : sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
8335 18 : CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
8336 :
8337 : struct SourcePixel
8338 : {
8339 : int iSrcX;
8340 : int iSrcY;
8341 :
8342 : // Coordinates of source pixel in target pixel coordinates
8343 : double dfDstX0;
8344 : double dfDstY0;
8345 : double dfDstX1;
8346 : double dfDstY1;
8347 : double dfDstX2;
8348 : double dfDstY2;
8349 : double dfDstX3;
8350 : double dfDstY3;
8351 :
8352 : // Source pixel total area (might be larger than the one described
8353 : // by above coordinates, if the pixel was crossing the antimeridian
8354 : // and split)
8355 : double dfArea;
8356 : };
8357 :
8358 36 : std::vector<SourcePixel> sourcePixels;
8359 :
8360 36 : XYPoly discontinuityLeft(5);
8361 36 : XYPoly discontinuityRight(5);
8362 :
8363 : /* ==================================================================== */
8364 : /* First pass: transform the 4 corners of each potential */
8365 : /* contributing source pixel to target pixel coordinates. */
8366 : /* ==================================================================== */
8367 :
8368 : // Special case for top line
8369 : {
8370 18 : int iY = 0;
8371 1130 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8372 : {
8373 1112 : adfX1[iX] = iX + poWK->nSrcXOff;
8374 1112 : adfY1[iX] = iY + poWK->nSrcYOff;
8375 1112 : adfZ1[iX] = 0;
8376 : }
8377 :
8378 18 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8379 : adfX1.data(), adfY1.data(), adfZ1.data(),
8380 : abSuccess1.data());
8381 :
8382 1130 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8383 : {
8384 1112 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8385 0 : abSuccess1[iX] = FALSE;
8386 : else
8387 : {
8388 1112 : adfX1[iX] -= poWK->nDstXOff;
8389 1112 : adfY1[iX] -= poWK->nDstYOff;
8390 : }
8391 : }
8392 : }
8393 :
8394 413412 : const auto getInsideXSign = [poWK, nDstXSize](double dfX)
8395 : {
8396 413412 : return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
8397 205344 : dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
8398 413412 : ? 1
8399 208068 : : -1;
8400 18 : };
8401 :
8402 : const auto FindDiscontinuity =
8403 80 : [poWK, psJob, getInsideXSign](
8404 : double dfXLeft, double dfXRight, double dfY,
8405 : int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
8406 800 : double &dfXMidReprojectedRight, double &dfYMidReprojected)
8407 : {
8408 880 : for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
8409 : {
8410 800 : double dfXMid = (dfXLeft + dfXRight) / 2;
8411 800 : double dfXMidReprojected = dfXMid;
8412 800 : dfYMidReprojected = dfY;
8413 800 : double dfZ = 0;
8414 800 : int nSuccess = 0;
8415 800 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
8416 : &dfXMidReprojected, &dfYMidReprojected, &dfZ,
8417 : &nSuccess);
8418 800 : if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
8419 : {
8420 456 : dfXRight = dfXMid;
8421 456 : dfXMidReprojectedRight = dfXMidReprojected;
8422 : }
8423 : else
8424 : {
8425 344 : dfXLeft = dfXMid;
8426 344 : dfXMidReprojectedLeft = dfXMidReprojected;
8427 : }
8428 : }
8429 80 : };
8430 :
8431 566 : for (int iY = 0; iY < nSrcYSize; ++iY)
8432 : {
8433 548 : std::swap(adfX0, adfX1);
8434 548 : std::swap(adfY0, adfY1);
8435 548 : std::swap(adfZ0, adfZ1);
8436 548 : std::swap(abSuccess0, abSuccess1);
8437 :
8438 104512 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8439 : {
8440 103964 : adfX1[iX] = iX + poWK->nSrcXOff;
8441 103964 : adfY1[iX] = iY + 1 + poWK->nSrcYOff;
8442 103964 : adfZ1[iX] = 0;
8443 : }
8444 :
8445 548 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8446 : adfX1.data(), adfY1.data(), adfZ1.data(),
8447 : abSuccess1.data());
8448 :
8449 104512 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8450 : {
8451 103964 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8452 0 : abSuccess1[iX] = FALSE;
8453 : else
8454 : {
8455 103964 : adfX1[iX] -= poWK->nDstXOff;
8456 103964 : adfY1[iX] -= poWK->nDstYOff;
8457 : }
8458 : }
8459 :
8460 103964 : for (int iX = 0; iX < nSrcXSize; ++iX)
8461 : {
8462 206832 : if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
8463 103416 : abSuccess1[iX + 1])
8464 : {
8465 : /* --------------------------------------------------------------------
8466 : */
8467 : /* Do not try to apply transparent source pixels to the
8468 : * destination.*/
8469 : /* --------------------------------------------------------------------
8470 : */
8471 103416 : const auto iSrcOffset =
8472 103416 : iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
8473 105816 : if (poWK->panUnifiedSrcValid != nullptr &&
8474 2400 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
8475 : {
8476 10971 : continue;
8477 : }
8478 :
8479 103410 : if (poWK->pafUnifiedSrcDensity != nullptr)
8480 : {
8481 0 : if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
8482 : SRC_DENSITY_THRESHOLD)
8483 0 : continue;
8484 : }
8485 :
8486 : SourcePixel sp;
8487 103410 : sp.dfArea = 0;
8488 103410 : sp.dfDstX0 = adfX0[iX];
8489 103410 : sp.dfDstY0 = adfY0[iX];
8490 103410 : sp.dfDstX1 = adfX0[iX + 1];
8491 103410 : sp.dfDstY1 = adfY0[iX + 1];
8492 103410 : sp.dfDstX2 = adfX1[iX + 1];
8493 103410 : sp.dfDstY2 = adfY1[iX + 1];
8494 103410 : sp.dfDstX3 = adfX1[iX];
8495 103410 : sp.dfDstY3 = adfY1[iX];
8496 :
8497 : // Detect pixel that likely cross the anti-meridian and
8498 : // introduce a discontinuity when reprojected.
8499 :
8500 103410 : if (getInsideXSign(adfX0[iX]) !=
8501 103506 : getInsideXSign(adfX0[iX + 1]) &&
8502 164 : getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
8503 68 : getInsideXSign(adfX0[iX + 1]) ==
8504 103574 : getInsideXSign(adfX1[iX + 1]) &&
8505 40 : (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
8506 : 0)
8507 : {
8508 40 : double dfXMidReprojectedLeftTop = 0;
8509 40 : double dfXMidReprojectedRightTop = 0;
8510 40 : double dfYMidReprojectedTop = 0;
8511 40 : FindDiscontinuity(
8512 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8513 80 : iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
8514 : dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
8515 : dfYMidReprojectedTop);
8516 40 : double dfXMidReprojectedLeftBottom = 0;
8517 40 : double dfXMidReprojectedRightBottom = 0;
8518 40 : double dfYMidReprojectedBottom = 0;
8519 40 : FindDiscontinuity(
8520 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8521 80 : iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
8522 : dfXMidReprojectedLeftBottom,
8523 : dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
8524 :
8525 40 : discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
8526 40 : discontinuityLeft[1] =
8527 80 : XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
8528 40 : discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
8529 40 : dfYMidReprojectedBottom);
8530 40 : discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
8531 40 : discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
8532 :
8533 40 : discontinuityRight[0] =
8534 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8535 40 : discontinuityRight[1] =
8536 80 : XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
8537 40 : discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
8538 40 : dfYMidReprojectedBottom);
8539 40 : discontinuityRight[3] =
8540 80 : XYPair(adfX1[iX + 1], adfY1[iX + 1]);
8541 40 : discontinuityRight[4] =
8542 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8543 :
8544 40 : sp.dfArea = getArea(discontinuityLeft) +
8545 40 : getArea(discontinuityRight);
8546 40 : if (getInsideXSign(adfX0[iX]) >= 1)
8547 : {
8548 20 : sp.dfDstX1 = dfXMidReprojectedLeftTop;
8549 20 : sp.dfDstY1 = dfYMidReprojectedTop;
8550 20 : sp.dfDstX2 = dfXMidReprojectedLeftBottom;
8551 20 : sp.dfDstY2 = dfYMidReprojectedBottom;
8552 : }
8553 : else
8554 : {
8555 20 : sp.dfDstX0 = dfXMidReprojectedRightTop;
8556 20 : sp.dfDstY0 = dfYMidReprojectedTop;
8557 20 : sp.dfDstX3 = dfXMidReprojectedRightBottom;
8558 20 : sp.dfDstY3 = dfYMidReprojectedBottom;
8559 : }
8560 : }
8561 :
8562 : // Bounding box of source pixel (expressed in target pixel
8563 : // coordinates)
8564 : CPLRectObj sRect;
8565 103410 : sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
8566 103410 : std::min(sp.dfDstX2, sp.dfDstX3));
8567 103410 : sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
8568 103410 : std::min(sp.dfDstY2, sp.dfDstY3));
8569 103410 : sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
8570 103410 : std::max(sp.dfDstX2, sp.dfDstX3));
8571 103410 : sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
8572 103410 : std::max(sp.dfDstY2, sp.dfDstY3));
8573 103410 : if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
8574 101350 : sRect.miny < iYMax && sRect.maxy > iYMin))
8575 : {
8576 10852 : continue;
8577 : }
8578 :
8579 92558 : sp.iSrcX = iX;
8580 92558 : sp.iSrcY = iY;
8581 :
8582 92558 : if (!bIsAffineNoRotation)
8583 : {
8584 : // Check polygon validity (no self-crossing)
8585 89745 : XYPair xy;
8586 89745 : if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
8587 89745 : XYPair(sp.dfDstX1, sp.dfDstY1),
8588 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8589 269235 : XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
8590 89745 : getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
8591 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8592 89745 : XYPair(sp.dfDstX0, sp.dfDstY0),
8593 179490 : XYPair(sp.dfDstX3, sp.dfDstY3), xy))
8594 : {
8595 113 : continue;
8596 : }
8597 : }
8598 :
8599 92445 : CPLQuadTreeInsertWithBounds(
8600 : hQuadTree,
8601 : reinterpret_cast<void *>(
8602 92445 : static_cast<uintptr_t>(sourcePixels.size())),
8603 : &sRect);
8604 :
8605 92445 : sourcePixels.push_back(sp);
8606 : }
8607 : }
8608 : }
8609 :
8610 36 : std::vector<double> adfRealValue(poWK->nBands);
8611 36 : std::vector<double> adfImagValue(poWK->nBands);
8612 36 : std::vector<double> adfBandDensity(poWK->nBands);
8613 36 : std::vector<double> adfWeight(poWK->nBands);
8614 :
8615 : #ifdef CHECK_SUM_WITH_GEOS
8616 : auto hGEOSContext = OGRGeometry::createGEOSContext();
8617 : auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8618 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
8619 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
8620 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
8621 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
8622 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
8623 : auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
8624 : auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
8625 :
8626 : auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8627 : auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
8628 : auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
8629 : #endif
8630 :
8631 : const XYPoly xy1{
8632 36 : {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
8633 36 : XYPoly xy2(5);
8634 36 : XYPoly xy2_triangle(4);
8635 36 : XYPoly intersection;
8636 :
8637 : /* ==================================================================== */
8638 : /* Loop over output lines. */
8639 : /* ==================================================================== */
8640 891 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
8641 : {
8642 : CPLRectObj sRect;
8643 873 : sRect.miny = iDstY;
8644 873 : sRect.maxy = iDstY + 1;
8645 :
8646 : /* ====================================================================
8647 : */
8648 : /* Loop over pixels in output scanline. */
8649 : /* ====================================================================
8650 : */
8651 221042 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
8652 : {
8653 220169 : sRect.minx = iDstX;
8654 220169 : sRect.maxx = iDstX + 1;
8655 220169 : int nSourcePixels = 0;
8656 : void **pahSourcePixel =
8657 220169 : CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
8658 220169 : if (nSourcePixels == 0)
8659 : {
8660 1258 : CPLFree(pahSourcePixel);
8661 1262 : continue;
8662 : }
8663 :
8664 218911 : std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
8665 218911 : std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
8666 218911 : std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
8667 218911 : std::fill(adfWeight.begin(), adfWeight.end(), 0);
8668 218911 : double dfDensity = 0;
8669 218911 : double dfTotalWeight = 0;
8670 :
8671 : /* ====================================================================
8672 : */
8673 : /* Iterate over each contributing source pixel to add its
8674 : */
8675 : /* value weighed by the ratio of the area of its
8676 : * intersection */
8677 : /* with the target pixel divided by the area of the source
8678 : */
8679 : /* pixel. */
8680 : /* ====================================================================
8681 : */
8682 1020520 : for (int i = 0; i < nSourcePixels; ++i)
8683 : {
8684 801614 : const int iSourcePixel = static_cast<int>(
8685 801614 : reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
8686 801614 : auto &sp = sourcePixels[iSourcePixel];
8687 :
8688 801614 : double dfWeight = 0.0;
8689 801614 : if (bIsAffineNoRotation)
8690 : {
8691 : // Optimization since the source pixel is a rectangle in
8692 : // target pixel coordinates
8693 16312 : double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
8694 16312 : double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
8695 16312 : double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
8696 16312 : double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
8697 16312 : double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
8698 16312 : double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
8699 16312 : double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
8700 16312 : double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
8701 16312 : dfWeight =
8702 16312 : ((dfIntersMaxX - dfIntersMinX) *
8703 16312 : (dfIntersMaxY - dfIntersMinY)) /
8704 16312 : ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
8705 : }
8706 : else
8707 : {
8708 : // Compute the polygon of the source pixel in target pixel
8709 : // coordinates, and shifted to the target pixel (unit square
8710 : // coordinates)
8711 :
8712 785302 : xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8713 785302 : xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
8714 785302 : xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
8715 785302 : xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
8716 785302 : xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8717 :
8718 785302 : if (isConvex(xy2))
8719 : {
8720 785302 : getConvexPolyIntersection(xy1, xy2, intersection);
8721 785302 : if (intersection.size() >= 3)
8722 : {
8723 468849 : dfWeight = getArea(intersection);
8724 : }
8725 : }
8726 : else
8727 : {
8728 : // Split xy2 into 2 triangles.
8729 0 : xy2_triangle[0] = xy2[0];
8730 0 : xy2_triangle[1] = xy2[1];
8731 0 : xy2_triangle[2] = xy2[2];
8732 0 : xy2_triangle[3] = xy2[0];
8733 0 : getConvexPolyIntersection(xy1, xy2_triangle,
8734 : intersection);
8735 0 : if (intersection.size() >= 3)
8736 : {
8737 0 : dfWeight = getArea(intersection);
8738 : }
8739 :
8740 0 : xy2_triangle[1] = xy2[2];
8741 0 : xy2_triangle[2] = xy2[3];
8742 0 : getConvexPolyIntersection(xy1, xy2_triangle,
8743 : intersection);
8744 0 : if (intersection.size() >= 3)
8745 : {
8746 0 : dfWeight += getArea(intersection);
8747 : }
8748 : }
8749 785302 : if (dfWeight > 0.0)
8750 : {
8751 468828 : if (sp.dfArea == 0)
8752 89592 : sp.dfArea = getArea(xy2);
8753 468828 : dfWeight /= sp.dfArea;
8754 : }
8755 :
8756 : #ifdef CHECK_SUM_WITH_GEOS
8757 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
8758 : sp.dfDstX0 - iDstX,
8759 : sp.dfDstY0 - iDstY);
8760 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
8761 : sp.dfDstX1 - iDstX,
8762 : sp.dfDstY1 - iDstY);
8763 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
8764 : sp.dfDstX2 - iDstX,
8765 : sp.dfDstY2 - iDstY);
8766 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
8767 : sp.dfDstX3 - iDstX,
8768 : sp.dfDstY3 - iDstY);
8769 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
8770 : sp.dfDstX0 - iDstX,
8771 : sp.dfDstY0 - iDstY);
8772 :
8773 : double dfWeightGEOS = 0.0;
8774 : auto hIntersection =
8775 : GEOSIntersection_r(hGEOSContext, hP1, hP2);
8776 : if (hIntersection)
8777 : {
8778 : double dfIntersArea = 0.0;
8779 : if (GEOSArea_r(hGEOSContext, hIntersection,
8780 : &dfIntersArea) &&
8781 : dfIntersArea > 0)
8782 : {
8783 : double dfSourceArea = 0.0;
8784 : if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
8785 : {
8786 : dfWeightGEOS = dfIntersArea / dfSourceArea;
8787 : }
8788 : }
8789 : GEOSGeom_destroy_r(hGEOSContext, hIntersection);
8790 : }
8791 : if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
8792 : {
8793 : /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
8794 : dfWeight, dfWeightGEOS);
8795 : printf("xy2: "); // ok
8796 : for (const auto &xy : xy2)
8797 : printf("[%f, %f], ", xy.first, xy.second); // ok
8798 : printf("\n"); // ok
8799 : printf("intersection: "); // ok
8800 : for (const auto &xy : intersection)
8801 : printf("[%f, %f], ", xy.first, xy.second); // ok
8802 : printf("\n"); // ok
8803 : }
8804 : #endif
8805 : }
8806 801614 : if (dfWeight > 0.0)
8807 : {
8808 474099 : const GPtrDiff_t iSrcOffset =
8809 474099 : sp.iSrcX +
8810 474099 : static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
8811 474099 : dfTotalWeight += dfWeight;
8812 :
8813 474099 : if (poWK->pafUnifiedSrcDensity != nullptr)
8814 : {
8815 0 : dfDensity +=
8816 0 : dfWeight * poWK->pafUnifiedSrcDensity[iSrcOffset];
8817 : }
8818 : else
8819 : {
8820 474099 : dfDensity += dfWeight;
8821 : }
8822 :
8823 1818720 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
8824 : {
8825 : // Returns pixel value if it is not no data.
8826 : double dfBandDensity;
8827 : double dfRealValue;
8828 : double dfImagValue;
8829 2689240 : if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
8830 : &dfBandDensity, &dfRealValue,
8831 : &dfImagValue) &&
8832 1344620 : dfBandDensity > BAND_DENSITY_THRESHOLD))
8833 : {
8834 0 : continue;
8835 : }
8836 :
8837 1344620 : adfRealValue[iBand] += dfRealValue * dfWeight;
8838 1344620 : adfImagValue[iBand] += dfImagValue * dfWeight;
8839 1344620 : adfBandDensity[iBand] += dfBandDensity * dfWeight;
8840 1344620 : adfWeight[iBand] += dfWeight;
8841 : }
8842 : }
8843 : }
8844 :
8845 218911 : CPLFree(pahSourcePixel);
8846 :
8847 : /* --------------------------------------------------------------------
8848 : */
8849 : /* Update destination pixel value. */
8850 : /* --------------------------------------------------------------------
8851 : */
8852 218911 : bool bHasFoundDensity = false;
8853 218911 : const GPtrDiff_t iDstOffset =
8854 218911 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
8855 827822 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
8856 : {
8857 608911 : if (adfWeight[iBand] > 0)
8858 : {
8859 : const double dfBandDensity =
8860 608907 : adfBandDensity[iBand] / adfWeight[iBand];
8861 608907 : if (dfBandDensity > BAND_DENSITY_THRESHOLD)
8862 : {
8863 608907 : bHasFoundDensity = true;
8864 608907 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8865 608907 : adfRealValue[iBand],
8866 608907 : adfImagValue[iBand]);
8867 : }
8868 : }
8869 : }
8870 :
8871 218911 : if (!bHasFoundDensity)
8872 4 : continue;
8873 :
8874 : /* --------------------------------------------------------------------
8875 : */
8876 : /* Update destination density/validity masks. */
8877 : /* --------------------------------------------------------------------
8878 : */
8879 218907 : GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
8880 :
8881 218907 : if (poWK->panDstValid != nullptr)
8882 : {
8883 11750 : CPLMaskSet(poWK->panDstValid, iDstOffset);
8884 : }
8885 : }
8886 :
8887 : /* --------------------------------------------------------------------
8888 : */
8889 : /* Report progress to the user, and optionally cancel out. */
8890 : /* --------------------------------------------------------------------
8891 : */
8892 873 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8893 0 : break;
8894 : }
8895 :
8896 : #ifdef CHECK_SUM_WITH_GEOS
8897 : GEOSGeom_destroy_r(hGEOSContext, hP1);
8898 : GEOSGeom_destroy_r(hGEOSContext, hP2);
8899 : OGRGeometry::freeGEOSContext(hGEOSContext);
8900 : #endif
8901 18 : CPLQuadTreeDestroy(hQuadTree);
8902 18 : }
|