Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: High Performance Image Reprojector
4 : * Purpose: Implementation of the GDALWarpKernel class. Implements the actual
5 : * image warping for a "chunk" of input and output imagery already
6 : * loaded into memory.
7 : * Author: Frank Warmerdam, warmerdam@pobox.com
8 : *
9 : ******************************************************************************
10 : * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
11 : * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
12 : *
13 : * SPDX-License-Identifier: MIT
14 : ****************************************************************************/
15 :
16 : #include "cpl_port.h"
17 : #include "gdalwarper.h"
18 :
19 : #include <cfloat>
20 : #include <cmath>
21 : #include <cstddef>
22 : #include <cstdlib>
23 : #include <cstring>
24 :
25 : #include <algorithm>
26 : #include <limits>
27 : #include <mutex>
28 : #include <new>
29 : #include <utility>
30 : #include <vector>
31 :
32 : #include "cpl_atomic_ops.h"
33 : #include "cpl_conv.h"
34 : #include "cpl_error.h"
35 : #include "cpl_float.h"
36 : #include "cpl_mask.h"
37 : #include "cpl_multiproc.h"
38 : #include "cpl_progress.h"
39 : #include "cpl_string.h"
40 : #include "cpl_vsi.h"
41 : #include "cpl_worker_thread_pool.h"
42 : #include "cpl_quad_tree.h"
43 : #include "gdal.h"
44 : #include "gdal_alg.h"
45 : #include "gdal_alg_priv.h"
46 : #include "gdal_thread_pool.h"
47 : #include "gdalresamplingkernels.h"
48 :
49 : // #define CHECK_SUM_WITH_GEOS
50 : #ifdef CHECK_SUM_WITH_GEOS
51 : #include "ogr_geometry.h"
52 : #include "ogr_geos.h"
53 : #endif
54 :
55 : #ifdef USE_NEON_OPTIMIZATIONS
56 : #include "include_sse2neon.h"
57 : #define USE_SSE2
58 :
59 : #include "gdalsse_priv.h"
60 :
61 : // We restrict to 64bit processors because they are guaranteed to have SSE2.
62 : // Could possibly be used too on 32bit, but we would need to check at runtime.
63 : #elif defined(__x86_64) || defined(_M_X64)
64 : #define USE_SSE2
65 :
66 : #include "gdalsse_priv.h"
67 :
68 : #if __SSE4_1__
69 : #include <smmintrin.h>
70 : #endif
71 :
72 : #if __SSE3__
73 : #include <pmmintrin.h>
74 : #endif
75 :
76 : #endif
77 :
78 : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
79 : constexpr float SRC_DENSITY_THRESHOLD_FLOAT = 0.000000001f;
80 : constexpr double SRC_DENSITY_THRESHOLD_DOUBLE = 0.000000001;
81 :
82 : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
83 :
84 : static const int anGWKFilterRadius[] = {
85 : 0, // Nearest neighbour
86 : 1, // Bilinear
87 : 2, // Cubic Convolution (Catmull-Rom)
88 : 2, // Cubic B-Spline
89 : 3, // Lanczos windowed sinc
90 : 0, // Average
91 : 0, // Mode
92 : 0, // Reserved GRA_Gauss=7
93 : 0, // Max
94 : 0, // Min
95 : 0, // Med
96 : 0, // Q1
97 : 0, // Q3
98 : 0, // Sum
99 : 0, // RMS
100 : };
101 :
102 : static double GWKBilinear(double dfX);
103 : static double GWKCubic(double dfX);
104 : static double GWKBSpline(double dfX);
105 : static double GWKLanczosSinc(double dfX);
106 :
107 : static const FilterFuncType apfGWKFilter[] = {
108 : nullptr, // Nearest neighbour
109 : GWKBilinear, // Bilinear
110 : GWKCubic, // Cubic Convolution (Catmull-Rom)
111 : GWKBSpline, // Cubic B-Spline
112 : GWKLanczosSinc, // Lanczos windowed sinc
113 : nullptr, // Average
114 : nullptr, // Mode
115 : nullptr, // Reserved GRA_Gauss=7
116 : nullptr, // Max
117 : nullptr, // Min
118 : nullptr, // Med
119 : nullptr, // Q1
120 : nullptr, // Q3
121 : nullptr, // Sum
122 : nullptr, // RMS
123 : };
124 :
125 : // TODO(schwehr): Can we make these functions have a const * const arg?
126 : static double GWKBilinear4Values(double *padfVals);
127 : static double GWKCubic4Values(double *padfVals);
128 : static double GWKBSpline4Values(double *padfVals);
129 : static double GWKLanczosSinc4Values(double *padfVals);
130 :
131 : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
132 : nullptr, // Nearest neighbour
133 : GWKBilinear4Values, // Bilinear
134 : GWKCubic4Values, // Cubic Convolution (Catmull-Rom)
135 : GWKBSpline4Values, // Cubic B-Spline
136 : GWKLanczosSinc4Values, // Lanczos windowed sinc
137 : nullptr, // Average
138 : nullptr, // Mode
139 : nullptr, // Reserved GRA_Gauss=7
140 : nullptr, // Max
141 : nullptr, // Min
142 : nullptr, // Med
143 : nullptr, // Q1
144 : nullptr, // Q3
145 : nullptr, // Sum
146 : nullptr, // RMS
147 : };
148 :
149 13701 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
150 : {
151 : static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
152 : "Bad size of anGWKFilterRadius");
153 13701 : return anGWKFilterRadius[eResampleAlg];
154 : }
155 :
156 5114 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
157 : {
158 : static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
159 : "Bad size of apfGWKFilter");
160 5114 : return apfGWKFilter[eResampleAlg];
161 : }
162 :
163 5114 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
164 : {
165 : static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
166 : "Bad size of apfGWKFilter4Values");
167 5114 : return apfGWKFilter4Values[eResampleAlg];
168 : }
169 :
170 : static CPLErr GWKGeneralCase(GDALWarpKernel *);
171 : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
172 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
173 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
174 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
175 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
176 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
177 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
178 : #endif
179 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
180 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
181 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
182 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
183 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
184 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
185 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
186 : #endif
187 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
188 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
189 : static CPLErr GWKNearestInt8(GDALWarpKernel *poWK);
190 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
191 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
192 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
193 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
194 : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
195 : static CPLErr GWKSumPreserving(GDALWarpKernel *);
196 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
197 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
198 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
199 :
200 : /************************************************************************/
201 : /* GWKJobStruct */
202 : /************************************************************************/
203 :
204 : struct GWKJobStruct
205 : {
206 : std::mutex &mutex;
207 : std::condition_variable &cv;
208 : int counterSingleThreaded = 0;
209 : int &counter;
210 : bool &stopFlag;
211 : GDALWarpKernel *poWK = nullptr;
212 : int iYMin = 0;
213 : int iYMax = 0;
214 : int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
215 : void *pTransformerArg = nullptr;
216 : // used by GWKRun() to assign the proper pTransformerArg
217 : void (*pfnFunc)(void *) = nullptr;
218 :
219 3180 : GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
220 : int &counter_, bool &stopFlag_)
221 3180 : : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
222 : {
223 3180 : }
224 : };
225 :
226 : struct GWKThreadData
227 : {
228 : std::unique_ptr<CPLJobQueue> poJobQueue{};
229 : std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
230 : int nMaxThreads{0};
231 : int counter{0};
232 : bool stopFlag{false};
233 : std::mutex mutex{};
234 : std::condition_variable cv{};
235 : bool bTransformerArgInputAssignedToThread{false};
236 : void *pTransformerArgInput{
237 : nullptr}; // owned by calling layer. Not to be destroyed
238 : std::map<GIntBig, void *> mapThreadToTransformerArg{};
239 : int nTotalThreadCountForThisRun = 0;
240 : int nCurThreadCountForThisRun = 0;
241 : };
242 :
243 : /************************************************************************/
244 : /* GWKProgressThread() */
245 : /************************************************************************/
246 :
247 : // Return TRUE if the computation must be interrupted.
248 23 : static int GWKProgressThread(GWKJobStruct *psJob)
249 : {
250 23 : bool stop = false;
251 : {
252 23 : std::lock_guard<std::mutex> lock(psJob->mutex);
253 23 : psJob->counter++;
254 23 : stop = psJob->stopFlag;
255 : }
256 23 : psJob->cv.notify_one();
257 :
258 23 : return stop;
259 : }
260 :
261 : /************************************************************************/
262 : /* GWKProgressMonoThread() */
263 : /************************************************************************/
264 :
265 : // Return TRUE if the computation must be interrupted.
266 435807 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
267 : {
268 435807 : GDALWarpKernel *poWK = psJob->poWK;
269 435807 : if (!poWK->pfnProgress(poWK->dfProgressBase +
270 435807 : poWK->dfProgressScale *
271 435807 : (++psJob->counterSingleThreaded /
272 435807 : static_cast<double>(psJob->iYMax)),
273 : "", poWK->pProgress))
274 : {
275 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
276 1 : psJob->stopFlag = true;
277 1 : return TRUE;
278 : }
279 435806 : return FALSE;
280 : }
281 :
282 : /************************************************************************/
283 : /* GWKGenericMonoThread() */
284 : /************************************************************************/
285 :
286 3156 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
287 : void (*pfnFunc)(void *pUserData))
288 : {
289 3156 : GWKThreadData td;
290 :
291 : // NOTE: the mutex is not used.
292 3156 : GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
293 3156 : job.poWK = poWK;
294 3156 : job.iYMin = 0;
295 3156 : job.iYMax = poWK->nDstYSize;
296 3156 : job.pfnProgress = GWKProgressMonoThread;
297 3156 : job.pTransformerArg = poWK->pTransformerArg;
298 3156 : job.counterSingleThreaded = td.counter;
299 3156 : pfnFunc(&job);
300 3156 : td.counter = job.counterSingleThreaded;
301 :
302 6312 : return td.stopFlag ? CE_Failure : CE_None;
303 : }
304 :
305 : /************************************************************************/
306 : /* GWKThreadsCreate() */
307 : /************************************************************************/
308 :
309 1781 : void *GWKThreadsCreate(char **papszWarpOptions,
310 : GDALTransformerFunc /* pfnTransformer */,
311 : void *pTransformerArg)
312 : {
313 1781 : const int nThreads = GDALGetNumThreads(papszWarpOptions, "NUM_THREADS",
314 : GDAL_DEFAULT_MAX_THREAD_COUNT,
315 : /* bDefaultAllCPUs = */ false);
316 1781 : GWKThreadData *psThreadData = new GWKThreadData();
317 : auto poThreadPool =
318 1781 : nThreads > 1 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
319 1781 : if (poThreadPool)
320 : {
321 24 : psThreadData->nMaxThreads = nThreads;
322 24 : psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
323 : nThreads,
324 24 : GWKJobStruct(psThreadData->mutex, psThreadData->cv,
325 48 : psThreadData->counter, psThreadData->stopFlag)));
326 :
327 24 : psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
328 24 : psThreadData->pTransformerArgInput = pTransformerArg;
329 : }
330 :
331 1781 : return psThreadData;
332 : }
333 :
334 : /************************************************************************/
335 : /* GWKThreadsEnd() */
336 : /************************************************************************/
337 :
338 1781 : void GWKThreadsEnd(void *psThreadDataIn)
339 : {
340 1781 : if (psThreadDataIn == nullptr)
341 0 : return;
342 :
343 1781 : GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
344 1781 : if (psThreadData->poJobQueue)
345 : {
346 : // cppcheck-suppress constVariableReference
347 34 : for (auto &pair : psThreadData->mapThreadToTransformerArg)
348 : {
349 10 : CPLAssert(pair.second != psThreadData->pTransformerArgInput);
350 10 : GDALDestroyTransformer(pair.second);
351 : }
352 24 : psThreadData->poJobQueue.reset();
353 : }
354 1781 : delete psThreadData;
355 : }
356 :
357 : /************************************************************************/
358 : /* ThreadFuncAdapter() */
359 : /************************************************************************/
360 :
361 33 : static void ThreadFuncAdapter(void *pData)
362 : {
363 33 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
364 33 : GWKThreadData *psThreadData =
365 33 : static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
366 :
367 : // Look if we have already a per-thread transformer
368 33 : void *pTransformerArg = nullptr;
369 33 : const GIntBig nThreadId = CPLGetPID();
370 :
371 : {
372 66 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
373 33 : ++psThreadData->nCurThreadCountForThisRun;
374 :
375 33 : auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
376 33 : if (oIter != psThreadData->mapThreadToTransformerArg.end())
377 : {
378 1 : pTransformerArg = oIter->second;
379 : }
380 32 : else if (!psThreadData->bTransformerArgInputAssignedToThread &&
381 32 : psThreadData->nCurThreadCountForThisRun ==
382 32 : psThreadData->nTotalThreadCountForThisRun)
383 : {
384 : // If we are the last thread to be started, temporarily borrow the
385 : // original transformer
386 22 : psThreadData->bTransformerArgInputAssignedToThread = true;
387 22 : pTransformerArg = psThreadData->pTransformerArgInput;
388 22 : psThreadData->mapThreadToTransformerArg[nThreadId] =
389 : pTransformerArg;
390 : }
391 :
392 33 : if (pTransformerArg == nullptr)
393 : {
394 10 : CPLAssert(psThreadData->pTransformerArgInput != nullptr);
395 10 : CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
396 : }
397 : }
398 :
399 : // If no transformer assigned to current thread, instantiate one
400 33 : if (pTransformerArg == nullptr)
401 : {
402 : // This somehow assumes that GDALCloneTransformer() is thread-safe
403 : // which should normally be the case.
404 : pTransformerArg =
405 10 : GDALCloneTransformer(psThreadData->pTransformerArgInput);
406 :
407 : // Lock for the stop flag and the transformer map.
408 10 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
409 10 : if (!pTransformerArg)
410 : {
411 0 : psJob->stopFlag = true;
412 0 : return;
413 : }
414 10 : psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
415 : }
416 :
417 33 : psJob->pTransformerArg = pTransformerArg;
418 33 : psJob->pfnFunc(pData);
419 :
420 : // Give back original transformer, if borrowed.
421 : {
422 66 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
423 33 : if (psThreadData->bTransformerArgInputAssignedToThread &&
424 24 : pTransformerArg == psThreadData->pTransformerArgInput)
425 : {
426 : psThreadData->mapThreadToTransformerArg.erase(
427 22 : psThreadData->mapThreadToTransformerArg.find(nThreadId));
428 22 : psThreadData->bTransformerArgInputAssignedToThread = false;
429 : }
430 : }
431 : }
432 :
433 : /************************************************************************/
434 : /* GWKRun() */
435 : /************************************************************************/
436 :
437 3179 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
438 : void (*pfnFunc)(void *pUserData))
439 :
440 : {
441 3179 : const int nDstYSize = poWK->nDstYSize;
442 :
443 3179 : CPLDebug("GDAL",
444 : "GDALWarpKernel()::%s() "
445 : "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
446 : pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
447 : poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
448 : poWK->nDstYSize);
449 :
450 3179 : if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
451 : {
452 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
453 0 : return CE_Failure;
454 : }
455 :
456 3179 : GWKThreadData *psThreadData =
457 : static_cast<GWKThreadData *>(poWK->psThreadData);
458 3179 : if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
459 : {
460 3156 : return GWKGenericMonoThread(poWK, pfnFunc);
461 : }
462 :
463 23 : int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
464 : // Config option mostly useful for tests to be able to test multithreading
465 : // with small rasters
466 : const int nWarpChunkSize =
467 23 : atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
468 23 : if (nWarpChunkSize > 0)
469 : {
470 21 : GIntBig nChunks =
471 21 : static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
472 21 : if (nThreads > nChunks)
473 16 : nThreads = static_cast<int>(nChunks);
474 : }
475 23 : if (nThreads <= 0)
476 19 : nThreads = 1;
477 :
478 23 : CPLDebug("WARP", "Using %d threads", nThreads);
479 :
480 23 : auto &jobs = *psThreadData->threadJobs;
481 23 : CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
482 : // Fill-in job structures.
483 56 : for (int i = 0; i < nThreads; ++i)
484 : {
485 33 : auto &job = jobs[i];
486 33 : job.poWK = poWK;
487 33 : job.iYMin =
488 33 : static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
489 33 : job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
490 33 : nThreads);
491 33 : if (poWK->pfnProgress != GDALDummyProgress)
492 2 : job.pfnProgress = GWKProgressThread;
493 33 : job.pfnFunc = pfnFunc;
494 : }
495 :
496 : bool bStopFlag;
497 : {
498 : {
499 : // Important: do not run the SubmitJob() loop under the mutex
500 : // because in some cases (typically if the current thread has been
501 : // created by the GDAL global thread pool), the task will actually
502 : // be run synchronously by SubmitJob(), and as it tries to acquire
503 : // the mutex, that would result in a dead-lock
504 23 : std::unique_lock<std::mutex> lock(psThreadData->mutex);
505 :
506 23 : psThreadData->nTotalThreadCountForThisRun = nThreads;
507 23 : psThreadData->nCurThreadCountForThisRun = 0;
508 : }
509 :
510 : // Start jobs.
511 56 : for (int i = 0; i < nThreads; ++i)
512 : {
513 33 : auto &job = jobs[i];
514 33 : psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
515 : static_cast<void *>(&job));
516 : }
517 :
518 : /* --------------------------------------------------------------------
519 : */
520 : /* Report progress. */
521 : /* --------------------------------------------------------------------
522 : */
523 23 : std::unique_lock<std::mutex> lock(psThreadData->mutex);
524 23 : if (poWK->pfnProgress != GDALDummyProgress)
525 : {
526 16 : while (psThreadData->counter < nDstYSize)
527 : {
528 15 : psThreadData->cv.wait(lock);
529 15 : if (!poWK->pfnProgress(poWK->dfProgressBase +
530 15 : poWK->dfProgressScale *
531 15 : (psThreadData->counter /
532 15 : static_cast<double>(nDstYSize)),
533 : "", poWK->pProgress))
534 : {
535 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
536 1 : psThreadData->stopFlag = true;
537 1 : break;
538 : }
539 : }
540 :
541 2 : if (!psThreadData->stopFlag)
542 : {
543 1 : if (!poWK->pfnProgress(poWK->dfProgressBase +
544 1 : poWK->dfProgressScale,
545 : "", poWK->pProgress))
546 : {
547 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
548 0 : psThreadData->stopFlag = true;
549 : }
550 : }
551 : }
552 :
553 23 : bStopFlag = psThreadData->stopFlag;
554 : }
555 :
556 : /* -------------------------------------------------------------------- */
557 : /* Wait for all jobs to complete. */
558 : /* -------------------------------------------------------------------- */
559 23 : psThreadData->poJobQueue->WaitCompletion();
560 :
561 23 : return bStopFlag ? CE_Failure : CE_None;
562 : }
563 :
564 : /************************************************************************/
565 : /* ==================================================================== */
566 : /* GDALWarpKernel */
567 : /* ==================================================================== */
568 : /************************************************************************/
569 :
570 : /**
571 : * \class GDALWarpKernel "gdalwarper.h"
572 : *
573 : * Low level image warping class.
574 : *
575 : * This class is responsible for low level image warping for one
576 : * "chunk" of imagery. The class is essentially a structure with all
577 : * data members public - primarily so that new special-case functions
578 : * can be added without changing the class declaration.
579 : *
580 : * Applications are normally intended to interactive with warping facilities
581 : * through the GDALWarpOperation class, though the GDALWarpKernel can in
582 : * theory be used directly if great care is taken in setting up the
583 : * control data.
584 : *
585 : * <h3>Design Issues</h3>
586 : *
587 : * The intention is that PerformWarp() would analyze the setup in terms
588 : * of the datatype, resampling type, and validity/density mask usage and
589 : * pick one of many specific implementations of the warping algorithm over
590 : * a continuum of optimization vs. generality. At one end there will be a
591 : * reference general purpose implementation of the algorithm that supports
592 : * any data type (working internally in double precision complex), all three
593 : * resampling types, and any or all of the validity/density masks. At the
594 : * other end would be highly optimized algorithms for common cases like
595 : * nearest neighbour resampling on GDT_UInt8 data with no masks.
596 : *
597 : * The full set of optimized versions have not been decided but we should
598 : * expect to have at least:
599 : * - One for each resampling algorithm for 8bit data with no masks.
600 : * - One for each resampling algorithm for float data with no masks.
601 : * - One for each resampling algorithm for float data with any/all masks
602 : * (essentially the generic case for just float data).
603 : * - One for each resampling algorithm for 8bit data with support for
604 : * input validity masks (per band or per pixel). This handles the common
605 : * case of nodata masking.
606 : * - One for each resampling algorithm for float data with support for
607 : * input validity masks (per band or per pixel). This handles the common
608 : * case of nodata masking.
609 : *
610 : * Some of the specializations would operate on all bands in one pass
611 : * (especially the ones without masking would do this), while others might
612 : * process each band individually to reduce code complexity.
613 : *
614 : * <h3>Masking Semantics</h3>
615 : *
616 : * A detailed explanation of the semantics of the validity and density masks,
617 : * and their effects on resampling kernels is needed here.
618 : */
619 :
620 : /************************************************************************/
621 : /* GDALWarpKernel Data Members */
622 : /************************************************************************/
623 :
624 : /**
625 : * \var GDALResampleAlg GDALWarpKernel::eResample;
626 : *
627 : * Resampling algorithm.
628 : *
629 : * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear,
630 : * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
631 : * GRA_Mode or GRA_Sum.
632 : *
633 : * This field is required. GDT_NearestNeighbour may be used as a default
634 : * value.
635 : */
636 :
637 : /**
638 : * \var GDALDataType GDALWarpKernel::eWorkingDataType;
639 : *
640 : * Working pixel data type.
641 : *
642 : * The datatype of pixels in the source image (papabySrcimage) and
643 : * destination image (papabyDstImage) buffers. Note that operations on
644 : * some data types (such as GDT_UInt8) may be much better optimized than other
645 : * less common cases.
646 : *
647 : * This field is required. It may not be GDT_Unknown.
648 : */
649 :
650 : /**
651 : * \var int GDALWarpKernel::nBands;
652 : *
653 : * Number of bands.
654 : *
655 : * The number of bands (layers) of imagery being warped. Determines the
656 : * number of entries in the papabySrcImage, papanBandSrcValid,
657 : * and papabyDstImage arrays.
658 : *
659 : * This field is required.
660 : */
661 :
662 : /**
663 : * \var int GDALWarpKernel::nSrcXSize;
664 : *
665 : * Source image width in pixels.
666 : *
667 : * This field is required.
668 : */
669 :
670 : /**
671 : * \var int GDALWarpKernel::nSrcYSize;
672 : *
673 : * Source image height in pixels.
674 : *
675 : * This field is required.
676 : */
677 :
678 : /**
679 : * \var double GDALWarpKernel::dfSrcXExtraSize;
680 : *
681 : * Number of pixels included in nSrcXSize that are present on the edges of
682 : * the area of interest to take into account the width of the kernel.
683 : *
684 : * This field is required.
685 : */
686 :
687 : /**
688 : * \var double GDALWarpKernel::dfSrcYExtraSize;
689 : *
690 : * Number of pixels included in nSrcYExtraSize that are present on the edges of
691 : * the area of interest to take into account the height of the kernel.
692 : *
693 : * This field is required.
694 : */
695 :
696 : /**
697 : * \var int GDALWarpKernel::papabySrcImage;
698 : *
699 : * Array of source image band data.
700 : *
701 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
702 : * to image data. Each individual band of image data is organized as a single
703 : * block of image data in left to right, then bottom to top order. The actual
704 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
705 : *
706 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
707 : * the second band with eWorkingDataType set to GDT_Float32 use code like
708 : * this:
709 : *
710 : * \code
711 : * float dfPixelValue;
712 : * int nBand = 2-1; // Band indexes are zero based.
713 : * int nPixel = 3; // Zero based.
714 : * int nLine = 4; // Zero based.
715 : *
716 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
717 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
718 : * assert( nBand >= 0 && nBand < poKern->nBands );
719 : * dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
720 : * [nPixel + nLine * poKern->nSrcXSize];
721 : * \endcode
722 : *
723 : * This field is required.
724 : */
725 :
726 : /**
727 : * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
728 : *
729 : * Per band validity mask for source pixels.
730 : *
731 : * Array of pixel validity mask layers for each source band. Each of
732 : * the mask layers is the same size (in pixels) as the source image with
733 : * one bit per pixel. Note that it is legal (and common) for this to be
734 : * NULL indicating that none of the pixels are invalidated, or for some
735 : * band validity masks to be NULL in which case all pixels of the band are
736 : * valid. The following code can be used to test the validity of a particular
737 : * pixel.
738 : *
739 : * \code
740 : * int bIsValid = TRUE;
741 : * int nBand = 2-1; // Band indexes are zero based.
742 : * int nPixel = 3; // Zero based.
743 : * int nLine = 4; // Zero based.
744 : *
745 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
746 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
747 : * assert( nBand >= 0 && nBand < poKern->nBands );
748 : *
749 : * if( poKern->papanBandSrcValid != NULL
750 : * && poKern->papanBandSrcValid[nBand] != NULL )
751 : * {
752 : * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
753 : * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
754 : *
755 : * bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
756 : * }
757 : * \endcode
758 : */
759 :
760 : /**
761 : * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
762 : *
763 : * Per pixel validity mask for source pixels.
764 : *
765 : * A single validity mask layer that applies to the pixels of all source
766 : * bands. It is accessed similarly to papanBandSrcValid, but without the
767 : * extra level of band indirection.
768 : *
769 : * This pointer may be NULL indicating that all pixels are valid.
770 : *
771 : * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
772 : * the pixel isn't considered to be valid unless both arrays indicate it is
773 : * valid.
774 : */
775 :
776 : /**
777 : * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
778 : *
779 : * Per pixel density mask for source pixels.
780 : *
781 : * A single density mask layer that applies to the pixels of all source
782 : * bands. It contains values between 0.0 and 1.0 indicating the degree to
783 : * which this pixel should be allowed to contribute to the output result.
784 : *
785 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
786 : *
787 : * The density for a pixel may be accessed like this:
788 : *
789 : * \code
790 : * float fDensity = 1.0;
791 : * int nPixel = 3; // Zero based.
792 : * int nLine = 4; // Zero based.
793 : *
794 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
795 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
796 : * if( poKern->pafUnifiedSrcDensity != NULL )
797 : * fDensity = poKern->pafUnifiedSrcDensity
798 : * [nPixel + nLine * poKern->nSrcXSize];
799 : * \endcode
800 : */
801 :
802 : /**
803 : * \var int GDALWarpKernel::nDstXSize;
804 : *
805 : * Width of destination image in pixels.
806 : *
807 : * This field is required.
808 : */
809 :
810 : /**
811 : * \var int GDALWarpKernel::nDstYSize;
812 : *
813 : * Height of destination image in pixels.
814 : *
815 : * This field is required.
816 : */
817 :
818 : /**
819 : * \var GByte **GDALWarpKernel::papabyDstImage;
820 : *
821 : * Array of destination image band data.
822 : *
823 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
824 : * to image data. Each individual band of image data is organized as a single
825 : * block of image data in left to right, then bottom to top order. The actual
826 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
827 : *
828 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
829 : * the second band with eWorkingDataType set to GDT_Float32 use code like
830 : * this:
831 : *
832 : * \code
833 : * float dfPixelValue;
834 : * int nBand = 2-1; // Band indexes are zero based.
835 : * int nPixel = 3; // Zero based.
836 : * int nLine = 4; // Zero based.
837 : *
838 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
839 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
840 : * assert( nBand >= 0 && nBand < poKern->nBands );
841 : * dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
842 : * [nPixel + nLine * poKern->nSrcYSize];
843 : * \endcode
844 : *
845 : * This field is required.
846 : */
847 :
848 : /**
849 : * \var GUInt32 *GDALWarpKernel::panDstValid;
850 : *
851 : * Per pixel validity mask for destination pixels.
852 : *
853 : * A single validity mask layer that applies to the pixels of all destination
854 : * bands. It is accessed similarly to papanUnitifiedSrcValid, but based
855 : * on the size of the destination image.
856 : *
857 : * This pointer may be NULL indicating that all pixels are valid.
858 : */
859 :
860 : /**
861 : * \var float *GDALWarpKernel::pafDstDensity;
862 : *
863 : * Per pixel density mask for destination pixels.
864 : *
865 : * A single density mask layer that applies to the pixels of all destination
866 : * bands. It contains values between 0.0 and 1.0.
867 : *
868 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
869 : *
870 : * The density for a pixel may be accessed like this:
871 : *
872 : * \code
873 : * float fDensity = 1.0;
874 : * int nPixel = 3; // Zero based.
875 : * int nLine = 4; // Zero based.
876 : *
877 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
878 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
879 : * if( poKern->pafDstDensity != NULL )
880 : * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
881 : * \endcode
882 : */
883 :
884 : /**
885 : * \var int GDALWarpKernel::nSrcXOff;
886 : *
887 : * X offset to source pixel coordinates for transformation.
888 : *
889 : * See pfnTransformer.
890 : *
891 : * This field is required.
892 : */
893 :
894 : /**
895 : * \var int GDALWarpKernel::nSrcYOff;
896 : *
897 : * Y offset to source pixel coordinates for transformation.
898 : *
899 : * See pfnTransformer.
900 : *
901 : * This field is required.
902 : */
903 :
904 : /**
905 : * \var int GDALWarpKernel::nDstXOff;
906 : *
907 : * X offset to destination pixel coordinates for transformation.
908 : *
909 : * See pfnTransformer.
910 : *
911 : * This field is required.
912 : */
913 :
914 : /**
915 : * \var int GDALWarpKernel::nDstYOff;
916 : *
917 : * Y offset to destination pixel coordinates for transformation.
918 : *
919 : * See pfnTransformer.
920 : *
921 : * This field is required.
922 : */
923 :
924 : /**
925 : * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
926 : *
927 : * Source/destination location transformer.
928 : *
929 : * The function to call to transform coordinates between source image
930 : * pixel/line coordinates and destination image pixel/line coordinates.
931 : * See GDALTransformerFunc() for details of the semantics of this function.
932 : *
933 : * The GDALWarpKern algorithm will only ever use this transformer in
934 : * "destination to source" mode (bDstToSrc=TRUE), and will always pass
935 : * partial or complete scanlines of points in the destination image as
936 : * input. This means, among other things, that it is safe to the
937 : * approximating transform GDALApproxTransform() as the transformation
938 : * function.
939 : *
940 : * Source and destination images may be subsets of a larger overall image.
941 : * The transformation algorithms will expect and return pixel/line coordinates
942 : * in terms of this larger image, so coordinates need to be offset by
943 : * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
944 : * passing to pfnTransformer, and after return from it.
945 : *
946 : * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
947 : * data to this function when it is called.
948 : *
949 : * This field is required.
950 : */
951 :
952 : /**
953 : * \var void *GDALWarpKernel::pTransformerArg;
954 : *
955 : * Callback data for pfnTransformer.
956 : *
957 : * This field may be NULL if not required for the pfnTransformer being used.
958 : */
959 :
960 : /**
961 : * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
962 : *
963 : * The function to call to report progress of the algorithm, and to check
964 : * for a requested termination of the operation. It operates according to
965 : * GDALProgressFunc() semantics.
966 : *
967 : * Generally speaking the progress function will be invoked for each
968 : * scanline of the destination buffer that has been processed.
969 : *
970 : * This field may be NULL (internally set to GDALDummyProgress()).
971 : */
972 :
973 : /**
974 : * \var void *GDALWarpKernel::pProgress;
975 : *
976 : * Callback data for pfnProgress.
977 : *
978 : * This field may be NULL if not required for the pfnProgress being used.
979 : */
980 :
981 : /************************************************************************/
982 : /* GDALWarpKernel() */
983 : /************************************************************************/
984 :
985 3797 : GDALWarpKernel::GDALWarpKernel()
986 : : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
987 : eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
988 : dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
989 : papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
990 : pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
991 : papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
992 : dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
993 : nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
994 : nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
995 : pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
996 : pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
997 : padfDstNoDataReal(nullptr), psThreadData(nullptr),
998 3797 : eTieStrategy(GWKTS_First)
999 : {
1000 3797 : }
1001 :
1002 : /************************************************************************/
1003 : /* ~GDALWarpKernel() */
1004 : /************************************************************************/
1005 :
1006 3797 : GDALWarpKernel::~GDALWarpKernel()
1007 : {
1008 3797 : }
1009 :
1010 : /************************************************************************/
1011 : /* getArea() */
1012 : /************************************************************************/
1013 :
1014 : typedef std::pair<double, double> XYPair;
1015 :
1016 : typedef std::vector<XYPair> XYPoly;
1017 :
1018 : // poly may or may not be closed.
1019 565793 : static double getArea(const XYPoly &poly)
1020 : {
1021 : // CPLAssert(poly.size() >= 2);
1022 565793 : const size_t nPointCount = poly.size();
1023 : double dfAreaSum =
1024 565793 : poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
1025 :
1026 1786950 : for (size_t i = 1; i < nPointCount - 1; i++)
1027 : {
1028 1221160 : dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
1029 : }
1030 :
1031 565793 : dfAreaSum += poly[nPointCount - 1].first *
1032 565793 : (poly[0].second - poly[nPointCount - 2].second);
1033 :
1034 565793 : return 0.5 * std::fabs(dfAreaSum);
1035 : }
1036 :
1037 : /************************************************************************/
1038 : /* CanUse4SamplesFormula() */
1039 : /************************************************************************/
1040 :
1041 4665 : static bool CanUse4SamplesFormula(const GDALWarpKernel *poWK)
1042 : {
1043 4665 : if (poWK->eResample == GRA_Bilinear || poWK->eResample == GRA_Cubic)
1044 : {
1045 : // Use 4-sample formula if we are not downsampling by more than a
1046 : // factor of 1:2
1047 2637 : if (poWK->dfXScale > 0.5 && poWK->dfYScale > 0.5)
1048 2201 : return true;
1049 436 : CPLDebugOnce("WARP",
1050 : "Not using 4-sample bilinear/bicubic formula because "
1051 : "XSCALE(=%f) and/or YSCALE(=%f) <= 0.5",
1052 : poWK->dfXScale, poWK->dfYScale);
1053 : }
1054 2464 : return false;
1055 : }
1056 :
1057 : /************************************************************************/
1058 : /* PerformWarp() */
1059 : /************************************************************************/
1060 :
1061 : /**
1062 : * \fn CPLErr GDALWarpKernel::PerformWarp();
1063 : *
1064 : * This method performs the warp described in the GDALWarpKernel.
1065 : *
1066 : * @return CE_None on success or CE_Failure if an error occurs.
1067 : */
1068 :
1069 3793 : CPLErr GDALWarpKernel::PerformWarp()
1070 :
1071 : {
1072 3793 : const CPLErr eErr = Validate();
1073 :
1074 3793 : if (eErr != CE_None)
1075 1 : return eErr;
1076 :
1077 : // See #2445 and #3079.
1078 3792 : if (nSrcXSize <= 0 || nSrcYSize <= 0)
1079 : {
1080 613 : if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
1081 : {
1082 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
1083 0 : return CE_Failure;
1084 : }
1085 613 : return CE_None;
1086 : }
1087 :
1088 : /* -------------------------------------------------------------------- */
1089 : /* Pre-calculate resampling scales and window sizes for filtering. */
1090 : /* -------------------------------------------------------------------- */
1091 :
1092 3179 : dfXScale = 0.0;
1093 3179 : dfYScale = 0.0;
1094 :
1095 : // XSCALE and YSCALE per warping chunk is not necessarily ideal, in case of
1096 : // heterogeneous change in shapes.
1097 : // Best would probably be a per-pixel scale computation.
1098 3179 : const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
1099 3179 : const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
1100 3179 : if (!pszXScale || !pszYScale)
1101 : {
1102 : // Sample points along a grid in the destination space
1103 3178 : constexpr int MAX_POINTS_PER_DIM = 10;
1104 3178 : const int nPointsX = std::min(MAX_POINTS_PER_DIM, nDstXSize);
1105 3178 : const int nPointsY = std::min(MAX_POINTS_PER_DIM, nDstYSize);
1106 3178 : constexpr int CORNER_COUNT_PER_SQUARE = 4;
1107 3178 : const int nPoints = CORNER_COUNT_PER_SQUARE * nPointsX * nPointsY;
1108 6356 : std::vector<double> adfX;
1109 6356 : std::vector<double> adfY;
1110 3178 : adfX.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
1111 3178 : adfY.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
1112 6356 : std::vector<double> adfZ(CORNER_COUNT_PER_SQUARE * nPoints);
1113 6356 : std::vector<int> abSuccess(CORNER_COUNT_PER_SQUARE * nPoints);
1114 30643 : for (int iY = 0; iY < nPointsY; iY++)
1115 : {
1116 27465 : const double dfYShift = (iY > 0 && iY == nPointsY - 1) ? -1.0 : 0.0;
1117 27465 : const double dfY =
1118 27465 : dfYShift + (nPointsY == 1 ? 0.0
1119 27249 : : static_cast<double>(iY) *
1120 27249 : nDstYSize / (nPointsY - 1));
1121 :
1122 291770 : for (int iX = 0; iX < nPointsX; iX++)
1123 : {
1124 264305 : const double dfXShift =
1125 264305 : (iX > 0 && iX == nPointsX - 1) ? -1.0 : 0.0;
1126 :
1127 264305 : const double dfX =
1128 264305 : dfXShift + (nPointsX == 1 ? 0.0
1129 264103 : : static_cast<double>(iX) *
1130 264103 : nDstXSize / (nPointsX - 1));
1131 :
1132 : // Reproject a unit square at each sample point
1133 264305 : adfX.push_back(dfX);
1134 264305 : adfY.push_back(dfY);
1135 :
1136 264305 : adfX.push_back(dfX + 1);
1137 264305 : adfY.push_back(dfY);
1138 :
1139 264305 : adfX.push_back(dfX);
1140 264305 : adfY.push_back(dfY + 1);
1141 :
1142 264305 : adfX.push_back(dfX + 1);
1143 264305 : adfY.push_back(dfY + 1);
1144 : }
1145 : }
1146 3178 : pfnTransformer(pTransformerArg, TRUE, static_cast<int>(adfX.size()),
1147 : adfX.data(), adfY.data(), adfZ.data(), abSuccess.data());
1148 :
1149 6356 : std::vector<XYPair> adfXYScales;
1150 3178 : adfXYScales.reserve(nPoints);
1151 267483 : for (int i = 0; i < nPoints; i += CORNER_COUNT_PER_SQUARE)
1152 : {
1153 527494 : if (abSuccess[i + 0] && abSuccess[i + 1] && abSuccess[i + 2] &&
1154 263189 : abSuccess[i + 3])
1155 : {
1156 2105500 : const auto square = [](double x) { return x * x; };
1157 :
1158 263187 : const double vx01 = adfX[i + 1] - adfX[i + 0];
1159 263187 : const double vy01 = adfY[i + 1] - adfY[i + 0];
1160 263187 : const double len01_sq = square(vx01) + square(vy01);
1161 :
1162 263187 : const double vx23 = adfX[i + 3] - adfX[i + 2];
1163 263187 : const double vy23 = adfY[i + 3] - adfY[i + 2];
1164 263187 : const double len23_sq = square(vx23) + square(vy23);
1165 :
1166 263187 : const double vx02 = adfX[i + 2] - adfX[i + 0];
1167 263187 : const double vy02 = adfY[i + 2] - adfY[i + 0];
1168 263187 : const double len02_sq = square(vx02) + square(vy02);
1169 :
1170 263187 : const double vx13 = adfX[i + 3] - adfX[i + 1];
1171 263187 : const double vy13 = adfY[i + 3] - adfY[i + 1];
1172 263187 : const double len13_sq = square(vx13) + square(vy13);
1173 :
1174 : // ~ 20 degree, heuristic
1175 263187 : constexpr double TAN_MODEST_ANGLE = 0.35;
1176 :
1177 : // 10%, heuristic
1178 263187 : constexpr double LENGTH_RELATIVE_TOLERANCE = 0.1;
1179 :
1180 : // Security margin to avoid division by zero (would only
1181 : // happen in case of degenerated coordinate transformation,
1182 : // or insane upsampling)
1183 263187 : constexpr double EPSILON = 1e-10;
1184 :
1185 : // Does the transformed square looks like an almost non-rotated
1186 : // quasi-rectangle ?
1187 263187 : if (std::fabs(vy01) < TAN_MODEST_ANGLE * vx01 &&
1188 256070 : std::fabs(vy23) < TAN_MODEST_ANGLE * vx23 &&
1189 256043 : std::fabs(len01_sq - len23_sq) <
1190 256043 : LENGTH_RELATIVE_TOLERANCE * std::fabs(len01_sq) &&
1191 255930 : std::fabs(len02_sq - len13_sq) <
1192 255930 : LENGTH_RELATIVE_TOLERANCE * std::fabs(len02_sq))
1193 : {
1194 : // Using a geometric average here of lenAB_sq and lenCD_sq,
1195 : // hence a sqrt(), and as this is still a squared value,
1196 : // we need another sqrt() to get a distance.
1197 : const double dfXLength =
1198 255915 : std::sqrt(std::sqrt(len01_sq * len23_sq));
1199 : const double dfYLength =
1200 255915 : std::sqrt(std::sqrt(len02_sq * len13_sq));
1201 255915 : if (dfXLength > EPSILON && dfYLength > EPSILON)
1202 : {
1203 255915 : const double dfThisXScale = 1.0 / dfXLength;
1204 255915 : const double dfThisYScale = 1.0 / dfYLength;
1205 255915 : adfXYScales.push_back({dfThisXScale, dfThisYScale});
1206 255915 : }
1207 : }
1208 : else
1209 : {
1210 : // If not, then consider the area of the transformed unit
1211 : // square to determine the X/Y scales.
1212 7272 : const XYPoly poly{{adfX[i + 0], adfY[i + 0]},
1213 7272 : {adfX[i + 1], adfY[i + 1]},
1214 7272 : {adfX[i + 3], adfY[i + 3]},
1215 29088 : {adfX[i + 2], adfY[i + 2]}};
1216 7272 : const double dfSrcArea = getArea(poly);
1217 7272 : const double dfFactor = std::sqrt(dfSrcArea);
1218 7272 : if (dfFactor > EPSILON)
1219 : {
1220 7272 : const double dfThisXScale = 1.0 / dfFactor;
1221 7272 : const double dfThisYScale = dfThisXScale;
1222 7272 : adfXYScales.push_back({dfThisXScale, dfThisYScale});
1223 : }
1224 : }
1225 : }
1226 : }
1227 :
1228 3178 : if (!adfXYScales.empty())
1229 : {
1230 : // Sort by increasing xscale * yscale
1231 3178 : std::sort(adfXYScales.begin(), adfXYScales.end(),
1232 1431090 : [](const XYPair &a, const XYPair &b)
1233 1431090 : { return a.first * a.second < b.first * b.second; });
1234 :
1235 : // Compute the per-axis maximum of scale
1236 3178 : double dfXMax = 0;
1237 3178 : double dfYMax = 0;
1238 266365 : for (const auto &[dfX, dfY] : adfXYScales)
1239 : {
1240 263187 : dfXMax = std::max(dfXMax, dfX);
1241 263187 : dfYMax = std::max(dfYMax, dfY);
1242 : }
1243 :
1244 : // Now eliminate outliers, defined as ones whose value is < 10% of
1245 : // the maximum value, typically found at a polar discontinuity, and
1246 : // compute the average of non-outlier values.
1247 3178 : dfXScale = 0;
1248 3178 : dfYScale = 0;
1249 3178 : int i = 0;
1250 3178 : constexpr double THRESHOLD = 0.1; // 10%, rather arbitrary
1251 266365 : for (const auto &[dfX, dfY] : adfXYScales)
1252 : {
1253 263187 : if (dfX > THRESHOLD * dfXMax && dfY > THRESHOLD * dfYMax)
1254 : {
1255 260339 : ++i;
1256 260339 : const double dfXDelta = dfX - dfXScale;
1257 260339 : const double dfYDelta = dfY - dfYScale;
1258 260339 : const double dfInvI = 1.0 / i;
1259 260339 : dfXScale += dfXDelta * dfInvI;
1260 260339 : dfYScale += dfYDelta * dfInvI;
1261 : }
1262 : }
1263 : }
1264 : }
1265 :
1266 : // Round to closest integer reciprocal scale if we are very close to it
1267 : const auto RoundToClosestIntegerReciprocalScaleIfCloseEnough =
1268 6358 : [](double dfScale)
1269 : {
1270 6358 : if (dfScale < 1.0)
1271 : {
1272 2554 : double dfReciprocalScale = 1.0 / dfScale;
1273 2554 : const int nReciprocalScale =
1274 2554 : static_cast<int>(dfReciprocalScale + 0.5);
1275 2554 : if (fabs(dfReciprocalScale - nReciprocalScale) < 0.05)
1276 2112 : dfScale = 1.0 / nReciprocalScale;
1277 : }
1278 6358 : return dfScale;
1279 : };
1280 :
1281 3179 : if (dfXScale <= 0)
1282 1 : dfXScale = 1.0;
1283 3179 : if (dfYScale <= 0)
1284 1 : dfYScale = 1.0;
1285 :
1286 3179 : dfXScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfXScale);
1287 3179 : dfYScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfYScale);
1288 :
1289 3179 : if (pszXScale != nullptr)
1290 1 : dfXScale = CPLAtof(pszXScale);
1291 3179 : if (pszYScale != nullptr)
1292 1 : dfYScale = CPLAtof(pszYScale);
1293 :
1294 3179 : if (!pszXScale || !pszYScale)
1295 3178 : CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
1296 :
1297 3179 : const int bUse4SamplesFormula = CanUse4SamplesFormula(this);
1298 :
1299 : // Safety check for callers that would use GDALWarpKernel without using
1300 : // GDALWarpOperation.
1301 3116 : if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
1302 3051 : ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
1303 6358 : !bUse4SamplesFormula)) &&
1304 346 : atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
1305 : WARP_EXTRA_ELTS)
1306 : {
1307 0 : CPLError(CE_Failure, CPLE_AppDefined,
1308 : "Source arrays must have WARP_EXTRA_ELTS extra elements at "
1309 : "their end. "
1310 : "See GDALWarpKernel class definition. If this condition is "
1311 : "fulfilled, define a EXTRA_ELTS=%d warp options",
1312 : WARP_EXTRA_ELTS);
1313 0 : return CE_Failure;
1314 : }
1315 :
1316 3179 : dfXFilter = anGWKFilterRadius[eResample];
1317 3179 : dfYFilter = anGWKFilterRadius[eResample];
1318 :
1319 3179 : nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
1320 2593 : : static_cast<int>(dfXFilter);
1321 3179 : nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
1322 2610 : : static_cast<int>(dfYFilter);
1323 :
1324 : // Filter window offset depends on the parity of the kernel radius.
1325 3179 : nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
1326 3179 : nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
1327 :
1328 3179 : bApplyVerticalShift =
1329 3179 : CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
1330 3179 : dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
1331 3179 : papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
1332 :
1333 : /* -------------------------------------------------------------------- */
1334 : /* Set up resampling functions. */
1335 : /* -------------------------------------------------------------------- */
1336 3179 : if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
1337 12 : return GWKGeneralCase(this);
1338 :
1339 3167 : const bool bNoMasksOrDstDensityOnly =
1340 3157 : papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
1341 6324 : pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
1342 :
1343 3167 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour &&
1344 : bNoMasksOrDstDensityOnly)
1345 911 : return GWKNearestNoMasksOrDstDensityOnlyByte(this);
1346 :
1347 2256 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Bilinear &&
1348 : bNoMasksOrDstDensityOnly)
1349 128 : return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
1350 :
1351 2128 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Cubic &&
1352 : bNoMasksOrDstDensityOnly)
1353 850 : return GWKCubicNoMasksOrDstDensityOnlyByte(this);
1354 :
1355 1278 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_CubicSpline &&
1356 : bNoMasksOrDstDensityOnly)
1357 12 : return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
1358 :
1359 1266 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour)
1360 363 : return GWKNearestByte(this);
1361 :
1362 903 : if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1363 154 : eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
1364 14 : return GWKNearestNoMasksOrDstDensityOnlyShort(this);
1365 :
1366 889 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
1367 : bNoMasksOrDstDensityOnly)
1368 5 : return GWKCubicNoMasksOrDstDensityOnlyShort(this);
1369 :
1370 884 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
1371 : bNoMasksOrDstDensityOnly)
1372 6 : return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
1373 :
1374 878 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
1375 : bNoMasksOrDstDensityOnly)
1376 5 : return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
1377 :
1378 873 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
1379 : bNoMasksOrDstDensityOnly)
1380 14 : return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
1381 :
1382 859 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
1383 : bNoMasksOrDstDensityOnly)
1384 5 : return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
1385 :
1386 854 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
1387 : bNoMasksOrDstDensityOnly)
1388 6 : return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
1389 :
1390 848 : if (eWorkingDataType == GDT_Int8 && eResample == GRA_NearestNeighbour)
1391 9 : return GWKNearestInt8(this);
1392 :
1393 839 : if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
1394 40 : return GWKNearestShort(this);
1395 :
1396 799 : if (eWorkingDataType == GDT_UInt16 && eResample == GRA_NearestNeighbour)
1397 10 : return GWKNearestUnsignedShort(this);
1398 :
1399 789 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
1400 : bNoMasksOrDstDensityOnly)
1401 11 : return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
1402 :
1403 778 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
1404 50 : return GWKNearestFloat(this);
1405 :
1406 728 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
1407 : bNoMasksOrDstDensityOnly)
1408 4 : return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
1409 :
1410 724 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
1411 : bNoMasksOrDstDensityOnly)
1412 9 : return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
1413 :
1414 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
1415 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
1416 : bNoMasksOrDstDensityOnly)
1417 : return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
1418 :
1419 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
1420 : bNoMasksOrDstDensityOnly)
1421 : return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
1422 : #endif
1423 :
1424 715 : if (eResample == GRA_Average)
1425 160 : return GWKAverageOrMode(this);
1426 :
1427 555 : if (eResample == GRA_RMS)
1428 9 : return GWKAverageOrMode(this);
1429 :
1430 546 : if (eResample == GRA_Mode)
1431 45 : return GWKAverageOrMode(this);
1432 :
1433 501 : if (eResample == GRA_Max)
1434 6 : return GWKAverageOrMode(this);
1435 :
1436 495 : if (eResample == GRA_Min)
1437 5 : return GWKAverageOrMode(this);
1438 :
1439 490 : if (eResample == GRA_Med)
1440 6 : return GWKAverageOrMode(this);
1441 :
1442 484 : if (eResample == GRA_Q1)
1443 10 : return GWKAverageOrMode(this);
1444 :
1445 474 : if (eResample == GRA_Q3)
1446 5 : return GWKAverageOrMode(this);
1447 :
1448 469 : if (eResample == GRA_Sum)
1449 19 : return GWKSumPreserving(this);
1450 :
1451 450 : if (!GDALDataTypeIsComplex(eWorkingDataType))
1452 : {
1453 223 : return GWKRealCase(this);
1454 : }
1455 :
1456 227 : return GWKGeneralCase(this);
1457 : }
1458 :
1459 : /************************************************************************/
1460 : /* Validate() */
1461 : /************************************************************************/
1462 :
1463 : /**
1464 : * \fn CPLErr GDALWarpKernel::Validate()
1465 : *
1466 : * Check the settings in the GDALWarpKernel, and issue a CPLError()
1467 : * (and return CE_Failure) if the configuration is considered to be
1468 : * invalid for some reason.
1469 : *
1470 : * This method will also do some standard defaulting such as setting
1471 : * pfnProgress to GDALDummyProgress() if it is NULL.
1472 : *
1473 : * @return CE_None on success or CE_Failure if an error is detected.
1474 : */
1475 :
1476 3793 : CPLErr GDALWarpKernel::Validate()
1477 :
1478 : {
1479 3793 : if (static_cast<size_t>(eResample) >=
1480 : (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
1481 : {
1482 0 : CPLError(CE_Failure, CPLE_AppDefined,
1483 : "Unsupported resampling method %d.",
1484 0 : static_cast<int>(eResample));
1485 0 : return CE_Failure;
1486 : }
1487 :
1488 : // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
1489 : // be ignored as contributing source pixels during resampling. Only taken into account by
1490 : // Average currently
1491 : const char *pszExcludedValues =
1492 3793 : CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
1493 3793 : if (pszExcludedValues)
1494 : {
1495 : const CPLStringList aosTokens(
1496 18 : CSLTokenizeString2(pszExcludedValues, "(,)", 0));
1497 18 : if ((aosTokens.size() % nBands) != 0)
1498 : {
1499 1 : CPLError(CE_Failure, CPLE_AppDefined,
1500 : "EXCLUDED_VALUES should contain one or several tuples of "
1501 : "%d values formatted like <R>,<G>,<B> or "
1502 : "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
1503 : "tuples",
1504 : nBands);
1505 1 : return CE_Failure;
1506 : }
1507 34 : std::vector<double> adfTuple;
1508 68 : for (int i = 0; i < aosTokens.size(); ++i)
1509 : {
1510 51 : adfTuple.push_back(CPLAtof(aosTokens[i]));
1511 51 : if (((i + 1) % nBands) == 0)
1512 : {
1513 17 : m_aadfExcludedValues.push_back(adfTuple);
1514 17 : adfTuple.clear();
1515 : }
1516 : }
1517 : }
1518 :
1519 3792 : return CE_None;
1520 : }
1521 :
1522 : /************************************************************************/
1523 : /* GWKOverlayDensity() */
1524 : /* */
1525 : /* Compute the final density for the destination pixel. This */
1526 : /* is a function of the overlay density (passed in) and the */
1527 : /* original density. */
1528 : /************************************************************************/
1529 :
1530 17762100 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
1531 : double dfDensity)
1532 : {
1533 17762100 : if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
1534 13309900 : return;
1535 :
1536 4452160 : poWK->pafDstDensity[iDstOffset] =
1537 4452160 : 1.0f -
1538 4452160 : (1.0f - float(dfDensity)) * (1.0f - poWK->pafDstDensity[iDstOffset]);
1539 : }
1540 :
1541 : /************************************************************************/
1542 : /* GWKRoundValueT() */
1543 : /************************************************************************/
1544 :
1545 : template <class T, class U, bool is_signed> struct sGWKRoundValueT
1546 : {
1547 : static T eval(U);
1548 : };
1549 :
1550 : template <class T, class U> struct sGWKRoundValueT<T, U, true> /* signed */
1551 : {
1552 791525 : static T eval(U value)
1553 : {
1554 791525 : return static_cast<T>(floor(value + U(0.5)));
1555 : }
1556 : };
1557 :
1558 : template <class T, class U> struct sGWKRoundValueT<T, U, false> /* unsigned */
1559 : {
1560 152026197 : static T eval(U value)
1561 : {
1562 152026197 : return static_cast<T>(value + U(0.5));
1563 : }
1564 : };
1565 :
1566 152817722 : template <class T, class U> static T GWKRoundValueT(U value)
1567 : {
1568 152817722 : return sGWKRoundValueT<T, U, cpl::NumericLimits<T>::is_signed>::eval(value);
1569 : }
1570 :
1571 268974 : template <> float GWKRoundValueT<float, double>(double value)
1572 : {
1573 268974 : return static_cast<float>(value);
1574 : }
1575 :
1576 : #ifdef notused
1577 : template <> double GWKRoundValueT<double, double>(double value)
1578 : {
1579 : return value;
1580 : }
1581 : #endif
1582 :
1583 : /************************************************************************/
1584 : /* GWKClampValueT() */
1585 : /************************************************************************/
1586 :
1587 145451362 : template <class T, class U> static CPL_INLINE T GWKClampValueT(U value)
1588 : {
1589 145451362 : if (value < static_cast<U>(cpl::NumericLimits<T>::min()))
1590 569367 : return cpl::NumericLimits<T>::min();
1591 144881964 : else if (value > static_cast<U>(cpl::NumericLimits<T>::max()))
1592 773825 : return cpl::NumericLimits<T>::max();
1593 : else
1594 144107844 : return GWKRoundValueT<T, U>(value);
1595 : }
1596 :
1597 718914 : template <> float GWKClampValueT<float, double>(double dfValue)
1598 : {
1599 718914 : return static_cast<float>(dfValue);
1600 : }
1601 :
1602 : #ifdef notused
1603 : template <> double GWKClampValueT<double, double>(double dfValue)
1604 : {
1605 : return dfValue;
1606 : }
1607 : #endif
1608 :
1609 : /************************************************************************/
1610 : /* AvoidNoData() */
1611 : /************************************************************************/
1612 :
1613 1283 : template <class T> inline void AvoidNoData(T *pDst, GPtrDiff_t iDstOffset)
1614 : {
1615 : if constexpr (cpl::NumericLimits<T>::is_integer)
1616 : {
1617 1027 : if (pDst[iDstOffset] == static_cast<T>(cpl::NumericLimits<T>::lowest()))
1618 : {
1619 515 : pDst[iDstOffset] =
1620 515 : static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
1621 : }
1622 : else
1623 512 : pDst[iDstOffset]--;
1624 : }
1625 : else
1626 : {
1627 256 : if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
1628 : {
1629 : using std::nextafter;
1630 0 : pDst[iDstOffset] = nextafter(pDst[iDstOffset], static_cast<T>(0));
1631 : }
1632 : else
1633 : {
1634 : using std::nextafter;
1635 256 : pDst[iDstOffset] =
1636 256 : nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
1637 : }
1638 : }
1639 1283 : }
1640 :
1641 : /************************************************************************/
1642 : /* AvoidNoData() */
1643 : /************************************************************************/
1644 :
1645 : template <class T>
1646 25539330 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
1647 : GPtrDiff_t iDstOffset)
1648 : {
1649 25539330 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1650 25539330 : T *pDst = reinterpret_cast<T *>(pabyDst);
1651 :
1652 25539330 : if (poWK->padfDstNoDataReal != nullptr &&
1653 11380638 : poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
1654 : {
1655 640 : AvoidNoData(pDst, iDstOffset);
1656 :
1657 640 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1658 : {
1659 40 : const_cast<GDALWarpKernel *>(poWK)
1660 : ->bWarnedAboutDstNoDataReplacement = true;
1661 40 : CPLError(CE_Warning, CPLE_AppDefined,
1662 : "Value %g in the source dataset has been changed to %g "
1663 : "in the destination dataset to avoid being treated as "
1664 : "NoData. To avoid this, select a different NoData value "
1665 : "for the destination dataset.",
1666 40 : poWK->padfDstNoDataReal[iBand],
1667 40 : static_cast<double>(pDst[iDstOffset]));
1668 : }
1669 : }
1670 25539330 : }
1671 :
1672 : /************************************************************************/
1673 : /* GWKAvoidNoDataMultiBand() */
1674 : /************************************************************************/
1675 :
1676 : template <class T>
1677 524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
1678 : GPtrDiff_t iDstOffset)
1679 : {
1680 524573 : T **ppDst = reinterpret_cast<T **>(poWK->papabyDstImage);
1681 524573 : if (poWK->padfDstNoDataReal != nullptr)
1682 : {
1683 208615 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1684 : {
1685 208294 : if (poWK->padfDstNoDataReal[iBand] !=
1686 208294 : static_cast<double>(ppDst[iBand][iDstOffset]))
1687 205830 : return;
1688 : }
1689 964 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1690 : {
1691 643 : AvoidNoData(ppDst[iBand], iDstOffset);
1692 : }
1693 :
1694 321 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1695 : {
1696 21 : const_cast<GDALWarpKernel *>(poWK)
1697 : ->bWarnedAboutDstNoDataReplacement = true;
1698 42 : std::string valueSrc, valueDst;
1699 64 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1700 : {
1701 43 : if (!valueSrc.empty())
1702 : {
1703 22 : valueSrc += ',';
1704 22 : valueDst += ',';
1705 : }
1706 43 : valueSrc += CPLSPrintf("%g", poWK->padfDstNoDataReal[iBand]);
1707 43 : valueDst += CPLSPrintf(
1708 43 : "%g", static_cast<double>(ppDst[iBand][iDstOffset]));
1709 : }
1710 21 : CPLError(CE_Warning, CPLE_AppDefined,
1711 : "Value %s in the source dataset has been changed to %s "
1712 : "in the destination dataset to avoid being treated as "
1713 : "NoData. To avoid this, select a different NoData value "
1714 : "for the destination dataset.",
1715 : valueSrc.c_str(), valueDst.c_str());
1716 : }
1717 : }
1718 : }
1719 :
1720 : /************************************************************************/
1721 : /* GWKAvoidNoDataMultiBand() */
1722 : /************************************************************************/
1723 :
1724 524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
1725 : GPtrDiff_t iDstOffset)
1726 : {
1727 524573 : switch (poWK->eWorkingDataType)
1728 : {
1729 523997 : case GDT_UInt8:
1730 523997 : GWKAvoidNoDataMultiBand<std::uint8_t>(poWK, iDstOffset);
1731 523997 : break;
1732 :
1733 64 : case GDT_Int8:
1734 64 : GWKAvoidNoDataMultiBand<std::int8_t>(poWK, iDstOffset);
1735 64 : break;
1736 :
1737 64 : case GDT_Int16:
1738 64 : GWKAvoidNoDataMultiBand<std::int16_t>(poWK, iDstOffset);
1739 64 : break;
1740 :
1741 64 : case GDT_UInt16:
1742 64 : GWKAvoidNoDataMultiBand<std::uint16_t>(poWK, iDstOffset);
1743 64 : break;
1744 :
1745 64 : case GDT_Int32:
1746 64 : GWKAvoidNoDataMultiBand<std::int32_t>(poWK, iDstOffset);
1747 64 : break;
1748 :
1749 64 : case GDT_UInt32:
1750 64 : GWKAvoidNoDataMultiBand<std::uint32_t>(poWK, iDstOffset);
1751 64 : break;
1752 :
1753 64 : case GDT_Int64:
1754 64 : GWKAvoidNoDataMultiBand<std::int64_t>(poWK, iDstOffset);
1755 64 : break;
1756 :
1757 64 : case GDT_UInt64:
1758 64 : GWKAvoidNoDataMultiBand<std::uint64_t>(poWK, iDstOffset);
1759 64 : break;
1760 :
1761 0 : case GDT_Float16:
1762 0 : GWKAvoidNoDataMultiBand<GFloat16>(poWK, iDstOffset);
1763 0 : break;
1764 :
1765 64 : case GDT_Float32:
1766 64 : GWKAvoidNoDataMultiBand<float>(poWK, iDstOffset);
1767 64 : break;
1768 :
1769 64 : case GDT_Float64:
1770 64 : GWKAvoidNoDataMultiBand<double>(poWK, iDstOffset);
1771 64 : break;
1772 :
1773 0 : case GDT_CInt16:
1774 : case GDT_CInt32:
1775 : case GDT_CFloat16:
1776 : case GDT_CFloat32:
1777 : case GDT_CFloat64:
1778 : case GDT_Unknown:
1779 : case GDT_TypeCount:
1780 0 : break;
1781 : }
1782 524573 : }
1783 :
1784 : /************************************************************************/
1785 : /* GWKSetPixelValueRealT() */
1786 : /************************************************************************/
1787 :
1788 : template <class T>
1789 14954277 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
1790 : GPtrDiff_t iDstOffset, double dfDensity,
1791 : T value, bool bAvoidNoDataSingleBand)
1792 : {
1793 14954277 : T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
1794 :
1795 : /* -------------------------------------------------------------------- */
1796 : /* If the source density is less than 100% we need to fetch the */
1797 : /* existing destination value, and mix it with the source to */
1798 : /* get the new "to apply" value. Also compute composite */
1799 : /* density. */
1800 : /* */
1801 : /* We avoid mixing if density is very near one or risk mixing */
1802 : /* in very extreme nodata values and causing odd results (#1610) */
1803 : /* -------------------------------------------------------------------- */
1804 14954277 : if (dfDensity < 0.9999)
1805 : {
1806 945508 : if (dfDensity < 0.0001)
1807 0 : return true;
1808 :
1809 945508 : double dfDstDensity = 1.0;
1810 :
1811 945508 : if (poWK->pafDstDensity != nullptr)
1812 944036 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1813 1472 : else if (poWK->panDstValid != nullptr &&
1814 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1815 0 : dfDstDensity = 0.0;
1816 :
1817 : // It seems like we also ought to be testing panDstValid[] here!
1818 :
1819 945508 : const double dfDstReal = static_cast<double>(pDst[iDstOffset]);
1820 :
1821 : // The destination density is really only relative to the portion
1822 : // not occluded by the overlay.
1823 945508 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1824 :
1825 945508 : const double dfReal =
1826 945508 : (double(value) * dfDensity + dfDstReal * dfDstInfluence) /
1827 945508 : (dfDensity + dfDstInfluence);
1828 :
1829 : /* --------------------------------------------------------------------
1830 : */
1831 : /* Actually apply the destination value. */
1832 : /* */
1833 : /* Avoid using the destination nodata value for integer datatypes
1834 : */
1835 : /* if by chance it is equal to the computed pixel value. */
1836 : /* --------------------------------------------------------------------
1837 : */
1838 945508 : pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
1839 : }
1840 : else
1841 : {
1842 14008836 : pDst[iDstOffset] = value;
1843 : }
1844 :
1845 14954277 : if (bAvoidNoDataSingleBand)
1846 13681621 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1847 :
1848 14954277 : return true;
1849 : }
1850 :
1851 : /************************************************************************/
1852 : /* ClampRoundAndAvoidNoData() */
1853 : /************************************************************************/
1854 :
1855 : template <class T>
1856 12158105 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
1857 : GPtrDiff_t iDstOffset, double dfReal,
1858 : bool bAvoidNoDataSingleBand)
1859 : {
1860 12158105 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1861 12158105 : T *pDst = reinterpret_cast<T *>(pabyDst);
1862 :
1863 : if constexpr (cpl::NumericLimits<T>::is_integer)
1864 : {
1865 : using std::floor;
1866 11660975 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
1867 6430 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
1868 11654575 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1869 23967 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
1870 : else if constexpr (cpl::NumericLimits<T>::is_signed)
1871 10410 : pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
1872 : else
1873 11620165 : pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
1874 : }
1875 : else
1876 : {
1877 497130 : pDst[iDstOffset] = static_cast<T>(dfReal);
1878 : }
1879 :
1880 12158105 : if (bAvoidNoDataSingleBand)
1881 11857709 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1882 12158105 : }
1883 :
1884 : /************************************************************************/
1885 : /* GWKSetPixelValue() */
1886 : /************************************************************************/
1887 :
1888 11045400 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
1889 : GPtrDiff_t iDstOffset, double dfDensity,
1890 : double dfReal, double dfImag,
1891 : bool bAvoidNoDataSingleBand)
1892 :
1893 : {
1894 11045400 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1895 :
1896 : /* -------------------------------------------------------------------- */
1897 : /* If the source density is less than 100% we need to fetch the */
1898 : /* existing destination value, and mix it with the source to */
1899 : /* get the new "to apply" value. Also compute composite */
1900 : /* density. */
1901 : /* */
1902 : /* We avoid mixing if density is very near one or risk mixing */
1903 : /* in very extreme nodata values and causing odd results (#1610) */
1904 : /* -------------------------------------------------------------------- */
1905 11045400 : if (dfDensity < 0.9999)
1906 : {
1907 800 : if (dfDensity < 0.0001)
1908 0 : return true;
1909 :
1910 800 : double dfDstDensity = 1.0;
1911 800 : if (poWK->pafDstDensity != nullptr)
1912 800 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1913 0 : else if (poWK->panDstValid != nullptr &&
1914 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1915 0 : dfDstDensity = 0.0;
1916 :
1917 800 : double dfDstReal = 0.0;
1918 800 : double dfDstImag = 0.0;
1919 : // It seems like we also ought to be testing panDstValid[] here!
1920 :
1921 : // TODO(schwehr): Factor out this repreated type of set.
1922 800 : switch (poWK->eWorkingDataType)
1923 : {
1924 0 : case GDT_UInt8:
1925 0 : dfDstReal = pabyDst[iDstOffset];
1926 0 : dfDstImag = 0.0;
1927 0 : break;
1928 :
1929 0 : case GDT_Int8:
1930 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1931 0 : dfDstImag = 0.0;
1932 0 : break;
1933 :
1934 400 : case GDT_Int16:
1935 400 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1936 400 : dfDstImag = 0.0;
1937 400 : break;
1938 :
1939 400 : case GDT_UInt16:
1940 400 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1941 400 : dfDstImag = 0.0;
1942 400 : break;
1943 :
1944 0 : case GDT_Int32:
1945 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1946 0 : dfDstImag = 0.0;
1947 0 : break;
1948 :
1949 0 : case GDT_UInt32:
1950 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1951 0 : dfDstImag = 0.0;
1952 0 : break;
1953 :
1954 0 : case GDT_Int64:
1955 0 : dfDstReal = static_cast<double>(
1956 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1957 0 : dfDstImag = 0.0;
1958 0 : break;
1959 :
1960 0 : case GDT_UInt64:
1961 0 : dfDstReal = static_cast<double>(
1962 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1963 0 : dfDstImag = 0.0;
1964 0 : break;
1965 :
1966 0 : case GDT_Float16:
1967 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1968 0 : dfDstImag = 0.0;
1969 0 : break;
1970 :
1971 0 : case GDT_Float32:
1972 0 : dfDstReal =
1973 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
1974 0 : dfDstImag = 0.0;
1975 0 : break;
1976 :
1977 0 : case GDT_Float64:
1978 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1979 0 : dfDstImag = 0.0;
1980 0 : break;
1981 :
1982 0 : case GDT_CInt16:
1983 0 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
1984 0 : dfDstImag =
1985 0 : reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
1986 0 : break;
1987 :
1988 0 : case GDT_CInt32:
1989 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
1990 0 : dfDstImag =
1991 0 : reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
1992 0 : break;
1993 :
1994 0 : case GDT_CFloat16:
1995 : dfDstReal =
1996 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
1997 : dfDstImag =
1998 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
1999 0 : break;
2000 :
2001 0 : case GDT_CFloat32:
2002 0 : dfDstReal =
2003 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset * 2]);
2004 0 : dfDstImag = double(
2005 0 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1]);
2006 0 : break;
2007 :
2008 0 : case GDT_CFloat64:
2009 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
2010 0 : dfDstImag =
2011 0 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
2012 0 : break;
2013 :
2014 0 : case GDT_Unknown:
2015 : case GDT_TypeCount:
2016 0 : CPLAssert(false);
2017 : return false;
2018 : }
2019 :
2020 : // The destination density is really only relative to the portion
2021 : // not occluded by the overlay.
2022 800 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
2023 :
2024 800 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
2025 800 : (dfDensity + dfDstInfluence);
2026 :
2027 800 : dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
2028 800 : (dfDensity + dfDstInfluence);
2029 : }
2030 :
2031 : /* -------------------------------------------------------------------- */
2032 : /* Actually apply the destination value. */
2033 : /* */
2034 : /* Avoid using the destination nodata value for integer datatypes */
2035 : /* if by chance it is equal to the computed pixel value. */
2036 : /* -------------------------------------------------------------------- */
2037 :
2038 11045400 : switch (poWK->eWorkingDataType)
2039 : {
2040 10323000 : case GDT_UInt8:
2041 10323000 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
2042 : bAvoidNoDataSingleBand);
2043 10323000 : break;
2044 :
2045 1 : case GDT_Int8:
2046 1 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
2047 : bAvoidNoDataSingleBand);
2048 1 : break;
2049 :
2050 7471 : case GDT_Int16:
2051 7471 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
2052 : bAvoidNoDataSingleBand);
2053 7471 : break;
2054 :
2055 464 : case GDT_UInt16:
2056 464 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
2057 : bAvoidNoDataSingleBand);
2058 464 : break;
2059 :
2060 63 : case GDT_UInt32:
2061 63 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
2062 : bAvoidNoDataSingleBand);
2063 63 : break;
2064 :
2065 63 : case GDT_Int32:
2066 63 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
2067 : bAvoidNoDataSingleBand);
2068 63 : break;
2069 :
2070 0 : case GDT_UInt64:
2071 0 : ClampRoundAndAvoidNoData<std::uint64_t>(
2072 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2073 0 : break;
2074 :
2075 0 : case GDT_Int64:
2076 0 : ClampRoundAndAvoidNoData<std::int64_t>(
2077 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2078 0 : break;
2079 :
2080 0 : case GDT_Float16:
2081 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
2082 : bAvoidNoDataSingleBand);
2083 0 : break;
2084 :
2085 478957 : case GDT_Float32:
2086 478957 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
2087 : bAvoidNoDataSingleBand);
2088 478957 : break;
2089 :
2090 149 : case GDT_Float64:
2091 149 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
2092 : bAvoidNoDataSingleBand);
2093 149 : break;
2094 :
2095 234079 : case GDT_CInt16:
2096 : {
2097 : typedef GInt16 T;
2098 234079 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
2099 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2100 0 : cpl::NumericLimits<T>::min();
2101 234079 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
2102 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2103 0 : cpl::NumericLimits<T>::max();
2104 : else
2105 234079 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2106 234079 : static_cast<T>(floor(dfReal + 0.5));
2107 234079 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
2108 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2109 0 : cpl::NumericLimits<T>::min();
2110 234079 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
2111 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2112 0 : cpl::NumericLimits<T>::max();
2113 : else
2114 234079 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2115 234079 : static_cast<T>(floor(dfImag + 0.5));
2116 234079 : break;
2117 : }
2118 :
2119 379 : case GDT_CInt32:
2120 : {
2121 : typedef GInt32 T;
2122 379 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
2123 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2124 0 : cpl::NumericLimits<T>::min();
2125 379 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
2126 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2127 0 : cpl::NumericLimits<T>::max();
2128 : else
2129 379 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2130 379 : static_cast<T>(floor(dfReal + 0.5));
2131 379 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
2132 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2133 0 : cpl::NumericLimits<T>::min();
2134 379 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
2135 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2136 0 : cpl::NumericLimits<T>::max();
2137 : else
2138 379 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2139 379 : static_cast<T>(floor(dfImag + 0.5));
2140 379 : break;
2141 : }
2142 :
2143 0 : case GDT_CFloat16:
2144 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
2145 0 : static_cast<GFloat16>(dfReal);
2146 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
2147 0 : static_cast<GFloat16>(dfImag);
2148 0 : break;
2149 :
2150 394 : case GDT_CFloat32:
2151 394 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
2152 394 : static_cast<float>(dfReal);
2153 394 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
2154 394 : static_cast<float>(dfImag);
2155 394 : break;
2156 :
2157 380 : case GDT_CFloat64:
2158 380 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
2159 380 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
2160 380 : break;
2161 :
2162 0 : case GDT_Unknown:
2163 : case GDT_TypeCount:
2164 0 : return false;
2165 : }
2166 :
2167 11045400 : return true;
2168 : }
2169 :
2170 : /************************************************************************/
2171 : /* GWKSetPixelValueReal() */
2172 : /************************************************************************/
2173 :
2174 1347980 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2175 : GPtrDiff_t iDstOffset, double dfDensity,
2176 : double dfReal, bool bAvoidNoDataSingleBand)
2177 :
2178 : {
2179 1347980 : GByte *pabyDst = poWK->papabyDstImage[iBand];
2180 :
2181 : /* -------------------------------------------------------------------- */
2182 : /* If the source density is less than 100% we need to fetch the */
2183 : /* existing destination value, and mix it with the source to */
2184 : /* get the new "to apply" value. Also compute composite */
2185 : /* density. */
2186 : /* */
2187 : /* We avoid mixing if density is very near one or risk mixing */
2188 : /* in very extreme nodata values and causing odd results (#1610) */
2189 : /* -------------------------------------------------------------------- */
2190 1347980 : if (dfDensity < 0.9999)
2191 : {
2192 600 : if (dfDensity < 0.0001)
2193 0 : return true;
2194 :
2195 600 : double dfDstReal = 0.0;
2196 600 : double dfDstDensity = 1.0;
2197 :
2198 600 : if (poWK->pafDstDensity != nullptr)
2199 600 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
2200 0 : else if (poWK->panDstValid != nullptr &&
2201 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
2202 0 : dfDstDensity = 0.0;
2203 :
2204 : // It seems like we also ought to be testing panDstValid[] here!
2205 :
2206 600 : switch (poWK->eWorkingDataType)
2207 : {
2208 0 : case GDT_UInt8:
2209 0 : dfDstReal = pabyDst[iDstOffset];
2210 0 : break;
2211 :
2212 0 : case GDT_Int8:
2213 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
2214 0 : break;
2215 :
2216 300 : case GDT_Int16:
2217 300 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
2218 300 : break;
2219 :
2220 300 : case GDT_UInt16:
2221 300 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
2222 300 : break;
2223 :
2224 0 : case GDT_Int32:
2225 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
2226 0 : break;
2227 :
2228 0 : case GDT_UInt32:
2229 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
2230 0 : break;
2231 :
2232 0 : case GDT_Int64:
2233 0 : dfDstReal = static_cast<double>(
2234 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
2235 0 : break;
2236 :
2237 0 : case GDT_UInt64:
2238 0 : dfDstReal = static_cast<double>(
2239 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
2240 0 : break;
2241 :
2242 0 : case GDT_Float16:
2243 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
2244 0 : break;
2245 :
2246 0 : case GDT_Float32:
2247 0 : dfDstReal =
2248 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
2249 0 : break;
2250 :
2251 0 : case GDT_Float64:
2252 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
2253 0 : break;
2254 :
2255 0 : case GDT_CInt16:
2256 : case GDT_CInt32:
2257 : case GDT_CFloat16:
2258 : case GDT_CFloat32:
2259 : case GDT_CFloat64:
2260 : case GDT_Unknown:
2261 : case GDT_TypeCount:
2262 0 : CPLAssert(false);
2263 : return false;
2264 : }
2265 :
2266 : // The destination density is really only relative to the portion
2267 : // not occluded by the overlay.
2268 600 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
2269 :
2270 600 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
2271 600 : (dfDensity + dfDstInfluence);
2272 : }
2273 :
2274 : /* -------------------------------------------------------------------- */
2275 : /* Actually apply the destination value. */
2276 : /* */
2277 : /* Avoid using the destination nodata value for integer datatypes */
2278 : /* if by chance it is equal to the computed pixel value. */
2279 : /* -------------------------------------------------------------------- */
2280 :
2281 1347980 : switch (poWK->eWorkingDataType)
2282 : {
2283 1325840 : case GDT_UInt8:
2284 1325840 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
2285 : bAvoidNoDataSingleBand);
2286 1325840 : break;
2287 :
2288 112 : case GDT_Int8:
2289 112 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
2290 : bAvoidNoDataSingleBand);
2291 112 : break;
2292 :
2293 1197 : case GDT_Int16:
2294 1197 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
2295 : bAvoidNoDataSingleBand);
2296 1197 : break;
2297 :
2298 475 : case GDT_UInt16:
2299 475 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
2300 : bAvoidNoDataSingleBand);
2301 475 : break;
2302 :
2303 539 : case GDT_UInt32:
2304 539 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
2305 : bAvoidNoDataSingleBand);
2306 539 : break;
2307 :
2308 1342 : case GDT_Int32:
2309 1342 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
2310 : bAvoidNoDataSingleBand);
2311 1342 : break;
2312 :
2313 224 : case GDT_UInt64:
2314 224 : ClampRoundAndAvoidNoData<std::uint64_t>(
2315 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2316 224 : break;
2317 :
2318 224 : case GDT_Int64:
2319 224 : ClampRoundAndAvoidNoData<std::int64_t>(
2320 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2321 224 : break;
2322 :
2323 0 : case GDT_Float16:
2324 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
2325 : bAvoidNoDataSingleBand);
2326 0 : break;
2327 :
2328 3538 : case GDT_Float32:
2329 3538 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
2330 : bAvoidNoDataSingleBand);
2331 3538 : break;
2332 :
2333 14486 : case GDT_Float64:
2334 14486 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
2335 : bAvoidNoDataSingleBand);
2336 14486 : break;
2337 :
2338 0 : case GDT_CInt16:
2339 : case GDT_CInt32:
2340 : case GDT_CFloat16:
2341 : case GDT_CFloat32:
2342 : case GDT_CFloat64:
2343 0 : return false;
2344 :
2345 0 : case GDT_Unknown:
2346 : case GDT_TypeCount:
2347 0 : CPLAssert(false);
2348 : return false;
2349 : }
2350 :
2351 1347980 : return true;
2352 : }
2353 :
2354 : /************************************************************************/
2355 : /* GWKGetPixelValue() */
2356 : /************************************************************************/
2357 :
2358 : /* It is assumed that panUnifiedSrcValid has been checked before */
2359 :
2360 40173600 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
2361 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2362 : double *pdfReal, double *pdfImag)
2363 :
2364 : {
2365 40173600 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2366 :
2367 80347200 : if (poWK->papanBandSrcValid != nullptr &&
2368 40173600 : poWK->papanBandSrcValid[iBand] != nullptr &&
2369 0 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2370 : {
2371 0 : *pdfDensity = 0.0;
2372 0 : return false;
2373 : }
2374 :
2375 40173600 : *pdfReal = 0.0;
2376 40173600 : *pdfImag = 0.0;
2377 :
2378 : // TODO(schwehr): Fix casting.
2379 40173600 : switch (poWK->eWorkingDataType)
2380 : {
2381 39096600 : case GDT_UInt8:
2382 39096600 : *pdfReal = pabySrc[iSrcOffset];
2383 39096600 : *pdfImag = 0.0;
2384 39096600 : break;
2385 :
2386 3 : case GDT_Int8:
2387 3 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2388 3 : *pdfImag = 0.0;
2389 3 : break;
2390 :
2391 28229 : case GDT_Int16:
2392 28229 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2393 28229 : *pdfImag = 0.0;
2394 28229 : break;
2395 :
2396 166 : case GDT_UInt16:
2397 166 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2398 166 : *pdfImag = 0.0;
2399 166 : break;
2400 :
2401 63 : case GDT_Int32:
2402 63 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2403 63 : *pdfImag = 0.0;
2404 63 : break;
2405 :
2406 63 : case GDT_UInt32:
2407 63 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2408 63 : *pdfImag = 0.0;
2409 63 : break;
2410 :
2411 0 : case GDT_Int64:
2412 0 : *pdfReal = static_cast<double>(
2413 0 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2414 0 : *pdfImag = 0.0;
2415 0 : break;
2416 :
2417 0 : case GDT_UInt64:
2418 0 : *pdfReal = static_cast<double>(
2419 0 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2420 0 : *pdfImag = 0.0;
2421 0 : break;
2422 :
2423 0 : case GDT_Float16:
2424 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2425 0 : *pdfImag = 0.0;
2426 0 : break;
2427 :
2428 1047220 : case GDT_Float32:
2429 1047220 : *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2430 1047220 : *pdfImag = 0.0;
2431 1047220 : break;
2432 :
2433 587 : case GDT_Float64:
2434 587 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2435 587 : *pdfImag = 0.0;
2436 587 : break;
2437 :
2438 133 : case GDT_CInt16:
2439 133 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
2440 133 : *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
2441 133 : break;
2442 :
2443 133 : case GDT_CInt32:
2444 133 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
2445 133 : *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
2446 133 : break;
2447 :
2448 0 : case GDT_CFloat16:
2449 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
2450 0 : *pdfImag =
2451 0 : reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
2452 0 : break;
2453 :
2454 194 : case GDT_CFloat32:
2455 194 : *pdfReal =
2456 194 : double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2]);
2457 194 : *pdfImag =
2458 194 : double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1]);
2459 194 : break;
2460 :
2461 138 : case GDT_CFloat64:
2462 138 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
2463 138 : *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
2464 138 : break;
2465 :
2466 0 : case GDT_Unknown:
2467 : case GDT_TypeCount:
2468 0 : CPLAssert(false);
2469 : *pdfDensity = 0.0;
2470 : return false;
2471 : }
2472 :
2473 40173600 : if (poWK->pafUnifiedSrcDensity != nullptr)
2474 12745700 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2475 : else
2476 27427800 : *pdfDensity = 1.0;
2477 :
2478 40173600 : return *pdfDensity != 0.0;
2479 : }
2480 :
2481 : /************************************************************************/
2482 : /* GWKGetPixelValueReal() */
2483 : /************************************************************************/
2484 :
2485 15516 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2486 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2487 : double *pdfReal)
2488 :
2489 : {
2490 15516 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2491 :
2492 31034 : if (poWK->papanBandSrcValid != nullptr &&
2493 15518 : poWK->papanBandSrcValid[iBand] != nullptr &&
2494 2 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2495 : {
2496 0 : *pdfDensity = 0.0;
2497 0 : return false;
2498 : }
2499 :
2500 15516 : switch (poWK->eWorkingDataType)
2501 : {
2502 1 : case GDT_UInt8:
2503 1 : *pdfReal = pabySrc[iSrcOffset];
2504 1 : break;
2505 :
2506 0 : case GDT_Int8:
2507 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2508 0 : break;
2509 :
2510 1 : case GDT_Int16:
2511 1 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2512 1 : break;
2513 :
2514 1 : case GDT_UInt16:
2515 1 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2516 1 : break;
2517 :
2518 982 : case GDT_Int32:
2519 982 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2520 982 : break;
2521 :
2522 179 : case GDT_UInt32:
2523 179 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2524 179 : break;
2525 :
2526 112 : case GDT_Int64:
2527 112 : *pdfReal = static_cast<double>(
2528 112 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2529 112 : break;
2530 :
2531 112 : case GDT_UInt64:
2532 112 : *pdfReal = static_cast<double>(
2533 112 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2534 112 : break;
2535 :
2536 0 : case GDT_Float16:
2537 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2538 0 : break;
2539 :
2540 2 : case GDT_Float32:
2541 2 : *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2542 2 : break;
2543 :
2544 14126 : case GDT_Float64:
2545 14126 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2546 14126 : break;
2547 :
2548 0 : case GDT_CInt16:
2549 : case GDT_CInt32:
2550 : case GDT_CFloat16:
2551 : case GDT_CFloat32:
2552 : case GDT_CFloat64:
2553 : case GDT_Unknown:
2554 : case GDT_TypeCount:
2555 0 : CPLAssert(false);
2556 : return false;
2557 : }
2558 :
2559 15516 : if (poWK->pafUnifiedSrcDensity != nullptr)
2560 0 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2561 : else
2562 15516 : *pdfDensity = 1.0;
2563 :
2564 15516 : return *pdfDensity != 0.0;
2565 : }
2566 :
2567 : /************************************************************************/
2568 : /* GWKGetPixelRow() */
2569 : /************************************************************************/
2570 :
2571 : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
2572 : /* data-types. */
2573 :
2574 2369710 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
2575 : GPtrDiff_t iSrcOffset, int nHalfSrcLen,
2576 : double *padfDensity, double adfReal[],
2577 : double *padfImag)
2578 : {
2579 : // We know that nSrcLen is even, so we can *always* unroll loops 2x.
2580 2369710 : const int nSrcLen = nHalfSrcLen * 2;
2581 2369710 : bool bHasValid = false;
2582 :
2583 2369710 : if (padfDensity != nullptr)
2584 : {
2585 : // Init the density.
2586 3384030 : for (int i = 0; i < nSrcLen; i += 2)
2587 : {
2588 2211910 : padfDensity[i] = 1.0;
2589 2211910 : padfDensity[i + 1] = 1.0;
2590 : }
2591 :
2592 1172120 : if (poWK->panUnifiedSrcValid != nullptr)
2593 : {
2594 3281460 : for (int i = 0; i < nSrcLen; i += 2)
2595 : {
2596 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
2597 2067740 : bHasValid = true;
2598 : else
2599 74323 : padfDensity[i] = 0.0;
2600 :
2601 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
2602 2068400 : bHasValid = true;
2603 : else
2604 73668 : padfDensity[i + 1] = 0.0;
2605 : }
2606 :
2607 : // Reset or fail as needed.
2608 1139400 : if (bHasValid)
2609 1116590 : bHasValid = false;
2610 : else
2611 22806 : return false;
2612 : }
2613 :
2614 1149320 : if (poWK->papanBandSrcValid != nullptr &&
2615 0 : poWK->papanBandSrcValid[iBand] != nullptr)
2616 : {
2617 0 : for (int i = 0; i < nSrcLen; i += 2)
2618 : {
2619 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
2620 0 : bHasValid = true;
2621 : else
2622 0 : padfDensity[i] = 0.0;
2623 :
2624 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
2625 0 : iSrcOffset + i + 1))
2626 0 : bHasValid = true;
2627 : else
2628 0 : padfDensity[i + 1] = 0.0;
2629 : }
2630 :
2631 : // Reset or fail as needed.
2632 0 : if (bHasValid)
2633 0 : bHasValid = false;
2634 : else
2635 0 : return false;
2636 : }
2637 : }
2638 :
2639 : // TODO(schwehr): Fix casting.
2640 : // Fetch data.
2641 2346910 : switch (poWK->eWorkingDataType)
2642 : {
2643 1136680 : case GDT_UInt8:
2644 : {
2645 1136680 : GByte *pSrc =
2646 1136680 : reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
2647 1136680 : pSrc += iSrcOffset;
2648 3281570 : for (int i = 0; i < nSrcLen; i += 2)
2649 : {
2650 2144890 : adfReal[i] = pSrc[i];
2651 2144890 : adfReal[i + 1] = pSrc[i + 1];
2652 : }
2653 1136680 : break;
2654 : }
2655 :
2656 196 : case GDT_Int8:
2657 : {
2658 196 : GInt8 *pSrc =
2659 196 : reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
2660 196 : pSrc += iSrcOffset;
2661 392 : for (int i = 0; i < nSrcLen; i += 2)
2662 : {
2663 196 : adfReal[i] = pSrc[i];
2664 196 : adfReal[i + 1] = pSrc[i + 1];
2665 : }
2666 196 : break;
2667 : }
2668 :
2669 5754 : case GDT_Int16:
2670 : {
2671 5754 : GInt16 *pSrc =
2672 5754 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2673 5754 : pSrc += iSrcOffset;
2674 21772 : for (int i = 0; i < nSrcLen; i += 2)
2675 : {
2676 16018 : adfReal[i] = pSrc[i];
2677 16018 : adfReal[i + 1] = pSrc[i + 1];
2678 : }
2679 5754 : break;
2680 : }
2681 :
2682 4310 : case GDT_UInt16:
2683 : {
2684 4310 : GUInt16 *pSrc =
2685 4310 : reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
2686 4310 : pSrc += iSrcOffset;
2687 18884 : for (int i = 0; i < nSrcLen; i += 2)
2688 : {
2689 14574 : adfReal[i] = pSrc[i];
2690 14574 : adfReal[i + 1] = pSrc[i + 1];
2691 : }
2692 4310 : break;
2693 : }
2694 :
2695 946 : case GDT_Int32:
2696 : {
2697 946 : GInt32 *pSrc =
2698 946 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2699 946 : pSrc += iSrcOffset;
2700 2624 : for (int i = 0; i < nSrcLen; i += 2)
2701 : {
2702 1678 : adfReal[i] = pSrc[i];
2703 1678 : adfReal[i + 1] = pSrc[i + 1];
2704 : }
2705 946 : break;
2706 : }
2707 :
2708 946 : case GDT_UInt32:
2709 : {
2710 946 : GUInt32 *pSrc =
2711 946 : reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
2712 946 : pSrc += iSrcOffset;
2713 2624 : for (int i = 0; i < nSrcLen; i += 2)
2714 : {
2715 1678 : adfReal[i] = pSrc[i];
2716 1678 : adfReal[i + 1] = pSrc[i + 1];
2717 : }
2718 946 : break;
2719 : }
2720 :
2721 196 : case GDT_Int64:
2722 : {
2723 196 : auto pSrc =
2724 196 : reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
2725 196 : pSrc += iSrcOffset;
2726 392 : for (int i = 0; i < nSrcLen; i += 2)
2727 : {
2728 196 : adfReal[i] = static_cast<double>(pSrc[i]);
2729 196 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2730 : }
2731 196 : break;
2732 : }
2733 :
2734 196 : case GDT_UInt64:
2735 : {
2736 196 : auto pSrc =
2737 196 : reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
2738 196 : pSrc += iSrcOffset;
2739 392 : for (int i = 0; i < nSrcLen; i += 2)
2740 : {
2741 196 : adfReal[i] = static_cast<double>(pSrc[i]);
2742 196 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2743 : }
2744 196 : break;
2745 : }
2746 :
2747 0 : case GDT_Float16:
2748 : {
2749 0 : GFloat16 *pSrc =
2750 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2751 0 : pSrc += iSrcOffset;
2752 0 : for (int i = 0; i < nSrcLen; i += 2)
2753 : {
2754 0 : adfReal[i] = pSrc[i];
2755 0 : adfReal[i + 1] = pSrc[i + 1];
2756 : }
2757 0 : break;
2758 : }
2759 :
2760 25270 : case GDT_Float32:
2761 : {
2762 25270 : float *pSrc =
2763 25270 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2764 25270 : pSrc += iSrcOffset;
2765 121739 : for (int i = 0; i < nSrcLen; i += 2)
2766 : {
2767 96469 : adfReal[i] = double(pSrc[i]);
2768 96469 : adfReal[i + 1] = double(pSrc[i + 1]);
2769 : }
2770 25270 : break;
2771 : }
2772 :
2773 946 : case GDT_Float64:
2774 : {
2775 946 : double *pSrc =
2776 946 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2777 946 : pSrc += iSrcOffset;
2778 2624 : for (int i = 0; i < nSrcLen; i += 2)
2779 : {
2780 1678 : adfReal[i] = pSrc[i];
2781 1678 : adfReal[i + 1] = pSrc[i + 1];
2782 : }
2783 946 : break;
2784 : }
2785 :
2786 1169220 : case GDT_CInt16:
2787 : {
2788 1169220 : GInt16 *pSrc =
2789 1169220 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2790 1169220 : pSrc += 2 * iSrcOffset;
2791 4676020 : for (int i = 0; i < nSrcLen; i += 2)
2792 : {
2793 3506800 : adfReal[i] = pSrc[2 * i];
2794 3506800 : padfImag[i] = pSrc[2 * i + 1];
2795 :
2796 3506800 : adfReal[i + 1] = pSrc[2 * i + 2];
2797 3506800 : padfImag[i + 1] = pSrc[2 * i + 3];
2798 : }
2799 1169220 : break;
2800 : }
2801 :
2802 750 : case GDT_CInt32:
2803 : {
2804 750 : GInt32 *pSrc =
2805 750 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2806 750 : pSrc += 2 * iSrcOffset;
2807 2232 : for (int i = 0; i < nSrcLen; i += 2)
2808 : {
2809 1482 : adfReal[i] = pSrc[2 * i];
2810 1482 : padfImag[i] = pSrc[2 * i + 1];
2811 :
2812 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2813 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2814 : }
2815 750 : break;
2816 : }
2817 :
2818 0 : case GDT_CFloat16:
2819 : {
2820 0 : GFloat16 *pSrc =
2821 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2822 0 : pSrc += 2 * iSrcOffset;
2823 0 : for (int i = 0; i < nSrcLen; i += 2)
2824 : {
2825 0 : adfReal[i] = pSrc[2 * i];
2826 0 : padfImag[i] = pSrc[2 * i + 1];
2827 :
2828 0 : adfReal[i + 1] = pSrc[2 * i + 2];
2829 0 : padfImag[i + 1] = pSrc[2 * i + 3];
2830 : }
2831 0 : break;
2832 : }
2833 :
2834 750 : case GDT_CFloat32:
2835 : {
2836 750 : float *pSrc =
2837 750 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2838 750 : pSrc += 2 * iSrcOffset;
2839 2232 : for (int i = 0; i < nSrcLen; i += 2)
2840 : {
2841 1482 : adfReal[i] = double(pSrc[2 * i]);
2842 1482 : padfImag[i] = double(pSrc[2 * i + 1]);
2843 :
2844 1482 : adfReal[i + 1] = double(pSrc[2 * i + 2]);
2845 1482 : padfImag[i + 1] = double(pSrc[2 * i + 3]);
2846 : }
2847 750 : break;
2848 : }
2849 :
2850 750 : case GDT_CFloat64:
2851 : {
2852 750 : double *pSrc =
2853 750 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2854 750 : pSrc += 2 * iSrcOffset;
2855 2232 : for (int i = 0; i < nSrcLen; i += 2)
2856 : {
2857 1482 : adfReal[i] = pSrc[2 * i];
2858 1482 : padfImag[i] = pSrc[2 * i + 1];
2859 :
2860 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2861 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2862 : }
2863 750 : break;
2864 : }
2865 :
2866 0 : case GDT_Unknown:
2867 : case GDT_TypeCount:
2868 0 : CPLAssert(false);
2869 : if (padfDensity)
2870 : memset(padfDensity, 0, nSrcLen * sizeof(double));
2871 : return false;
2872 : }
2873 :
2874 2346910 : if (padfDensity == nullptr)
2875 1197590 : return true;
2876 :
2877 1149320 : if (poWK->pafUnifiedSrcDensity == nullptr)
2878 : {
2879 3256740 : for (int i = 0; i < nSrcLen; i += 2)
2880 : {
2881 : // Take into account earlier calcs.
2882 2127390 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2883 : {
2884 2087480 : padfDensity[i] = 1.0;
2885 2087480 : bHasValid = true;
2886 : }
2887 :
2888 2127390 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2889 : {
2890 2088140 : padfDensity[i + 1] = 1.0;
2891 2088140 : bHasValid = true;
2892 : }
2893 : }
2894 : }
2895 : else
2896 : {
2897 70068 : for (int i = 0; i < nSrcLen; i += 2)
2898 : {
2899 50103 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2900 50103 : padfDensity[i] =
2901 50103 : double(poWK->pafUnifiedSrcDensity[iSrcOffset + i]);
2902 50103 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2903 49252 : bHasValid = true;
2904 :
2905 50103 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2906 50103 : padfDensity[i + 1] =
2907 50103 : double(poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]);
2908 50103 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2909 49170 : bHasValid = true;
2910 : }
2911 : }
2912 :
2913 1149320 : return bHasValid;
2914 : }
2915 :
2916 : /************************************************************************/
2917 : /* GWKGetPixelT() */
2918 : /************************************************************************/
2919 :
2920 : template <class T>
2921 14964659 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
2922 : GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
2923 :
2924 : {
2925 14964659 : T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2926 :
2927 33172043 : if ((poWK->panUnifiedSrcValid != nullptr &&
2928 29929218 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
2929 14964659 : (poWK->papanBandSrcValid != nullptr &&
2930 589863 : poWK->papanBandSrcValid[iBand] != nullptr &&
2931 589863 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
2932 : {
2933 9 : *pdfDensity = 0.0;
2934 9 : return false;
2935 : }
2936 :
2937 14964559 : *pValue = pSrc[iSrcOffset];
2938 :
2939 14964559 : if (poWK->pafUnifiedSrcDensity == nullptr)
2940 13842266 : *pdfDensity = 1.0;
2941 : else
2942 1122362 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2943 :
2944 14964559 : return *pdfDensity != 0.0;
2945 : }
2946 :
2947 : /************************************************************************/
2948 : /* GWKBilinearResample() */
2949 : /* Set of bilinear interpolators */
2950 : /************************************************************************/
2951 :
2952 77448 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
2953 : double dfSrcX, double dfSrcY,
2954 : double *pdfDensity, double *pdfReal,
2955 : double *pdfImag)
2956 :
2957 : {
2958 : // Save as local variables to avoid following pointers.
2959 77448 : const int nSrcXSize = poWK->nSrcXSize;
2960 77448 : const int nSrcYSize = poWK->nSrcYSize;
2961 :
2962 77448 : int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2963 77448 : int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2964 77448 : double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2965 77448 : double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2966 77448 : bool bShifted = false;
2967 :
2968 77448 : if (iSrcX == -1)
2969 : {
2970 1534 : iSrcX = 0;
2971 1534 : dfRatioX = 1;
2972 : }
2973 77448 : if (iSrcY == -1)
2974 : {
2975 7734 : iSrcY = 0;
2976 7734 : dfRatioY = 1;
2977 : }
2978 77448 : GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
2979 :
2980 : // Shift so we don't overrun the array.
2981 77448 : if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
2982 77330 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
2983 77330 : iSrcOffset + nSrcXSize + 1)
2984 : {
2985 230 : bShifted = true;
2986 230 : --iSrcOffset;
2987 : }
2988 :
2989 77448 : double adfDensity[2] = {0.0, 0.0};
2990 77448 : double adfReal[2] = {0.0, 0.0};
2991 77448 : double adfImag[2] = {0.0, 0.0};
2992 77448 : double dfAccumulatorReal = 0.0;
2993 77448 : double dfAccumulatorImag = 0.0;
2994 77448 : double dfAccumulatorDensity = 0.0;
2995 77448 : double dfAccumulatorDivisor = 0.0;
2996 :
2997 77448 : const GPtrDiff_t nSrcPixels =
2998 77448 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
2999 : // Get pixel row.
3000 77448 : if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
3001 154896 : iSrcOffset < nSrcPixels &&
3002 77448 : GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
3003 : adfImag))
3004 : {
3005 71504 : double dfMult1 = dfRatioX * dfRatioY;
3006 71504 : double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
3007 :
3008 : // Shifting corrected.
3009 71504 : if (bShifted)
3010 : {
3011 230 : adfReal[0] = adfReal[1];
3012 230 : adfImag[0] = adfImag[1];
3013 230 : adfDensity[0] = adfDensity[1];
3014 : }
3015 :
3016 : // Upper Left Pixel.
3017 71504 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
3018 71504 : adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
3019 : {
3020 66050 : dfAccumulatorDivisor += dfMult1;
3021 :
3022 66050 : dfAccumulatorReal += adfReal[0] * dfMult1;
3023 66050 : dfAccumulatorImag += adfImag[0] * dfMult1;
3024 66050 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
3025 : }
3026 :
3027 : // Upper Right Pixel.
3028 71504 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
3029 70609 : adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
3030 : {
3031 65335 : dfAccumulatorDivisor += dfMult2;
3032 :
3033 65335 : dfAccumulatorReal += adfReal[1] * dfMult2;
3034 65335 : dfAccumulatorImag += adfImag[1] * dfMult2;
3035 65335 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
3036 : }
3037 : }
3038 :
3039 : // Get pixel row.
3040 77448 : if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
3041 228032 : iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
3042 73136 : GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
3043 : adfReal, adfImag))
3044 : {
3045 67577 : double dfMult1 = dfRatioX * (1.0 - dfRatioY);
3046 67577 : double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
3047 :
3048 : // Shifting corrected
3049 67577 : if (bShifted)
3050 : {
3051 112 : adfReal[0] = adfReal[1];
3052 112 : adfImag[0] = adfImag[1];
3053 112 : adfDensity[0] = adfDensity[1];
3054 : }
3055 :
3056 : // Lower Left Pixel
3057 67577 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
3058 67577 : adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
3059 : {
3060 62298 : dfAccumulatorDivisor += dfMult1;
3061 :
3062 62298 : dfAccumulatorReal += adfReal[0] * dfMult1;
3063 62298 : dfAccumulatorImag += adfImag[0] * dfMult1;
3064 62298 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
3065 : }
3066 :
3067 : // Lower Right Pixel.
3068 67577 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
3069 66800 : adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
3070 : {
3071 61823 : dfAccumulatorDivisor += dfMult2;
3072 :
3073 61823 : dfAccumulatorReal += adfReal[1] * dfMult2;
3074 61823 : dfAccumulatorImag += adfImag[1] * dfMult2;
3075 61823 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
3076 : }
3077 : }
3078 :
3079 : /* -------------------------------------------------------------------- */
3080 : /* Return result. */
3081 : /* -------------------------------------------------------------------- */
3082 77448 : if (dfAccumulatorDivisor == 1.0)
3083 : {
3084 45929 : *pdfReal = dfAccumulatorReal;
3085 45929 : *pdfImag = dfAccumulatorImag;
3086 45929 : *pdfDensity = dfAccumulatorDensity;
3087 45929 : return false;
3088 : }
3089 31519 : else if (dfAccumulatorDivisor < 0.00001)
3090 : {
3091 0 : *pdfReal = 0.0;
3092 0 : *pdfImag = 0.0;
3093 0 : *pdfDensity = 0.0;
3094 0 : return false;
3095 : }
3096 : else
3097 : {
3098 31519 : *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
3099 31519 : *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
3100 31519 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
3101 31519 : return true;
3102 : }
3103 : }
3104 :
3105 : template <class T>
3106 8978832 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3107 : int iBand, double dfSrcX,
3108 : double dfSrcY, T *pValue)
3109 :
3110 : {
3111 :
3112 8978832 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3113 8978832 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3114 8978832 : GPtrDiff_t iSrcOffset =
3115 8978832 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3116 8978832 : const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
3117 8978832 : const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
3118 :
3119 8978832 : const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
3120 :
3121 8978832 : if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
3122 6223819 : iSrcY + 1 < poWK->nSrcYSize)
3123 : {
3124 6032332 : const double dfAccumulator =
3125 6032332 : (double(pSrc[iSrcOffset]) * dfRatioX +
3126 6032332 : double(pSrc[iSrcOffset + 1]) * (1.0 - dfRatioX)) *
3127 : dfRatioY +
3128 6032332 : (double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfRatioX +
3129 6032332 : double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) *
3130 6032332 : (1.0 - dfRatioX)) *
3131 6032332 : (1.0 - dfRatioY);
3132 :
3133 6032332 : *pValue = GWKRoundValueT<T>(dfAccumulator);
3134 :
3135 6032332 : return true;
3136 : }
3137 :
3138 2946510 : double dfAccumulatorDivisor = 0.0;
3139 2946510 : double dfAccumulator = 0.0;
3140 :
3141 : // Upper Left Pixel.
3142 2946510 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
3143 564863 : iSrcY < poWK->nSrcYSize)
3144 : {
3145 564863 : const double dfMult = dfRatioX * dfRatioY;
3146 :
3147 564863 : dfAccumulatorDivisor += dfMult;
3148 :
3149 564863 : dfAccumulator += double(pSrc[iSrcOffset]) * dfMult;
3150 : }
3151 :
3152 : // Upper Right Pixel.
3153 2946510 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
3154 2261916 : iSrcY < poWK->nSrcYSize)
3155 : {
3156 2261916 : const double dfMult = (1.0 - dfRatioX) * dfRatioY;
3157 :
3158 2261916 : dfAccumulatorDivisor += dfMult;
3159 :
3160 2261916 : dfAccumulator += double(pSrc[iSrcOffset + 1]) * dfMult;
3161 : }
3162 :
3163 : // Lower Right Pixel.
3164 2946510 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
3165 2512904 : iSrcY + 1 < poWK->nSrcYSize)
3166 : {
3167 2261233 : const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
3168 :
3169 2261233 : dfAccumulatorDivisor += dfMult;
3170 :
3171 2261233 : dfAccumulator +=
3172 2261233 : double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) * dfMult;
3173 : }
3174 :
3175 : // Lower Left Pixel.
3176 2946510 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
3177 815588 : iSrcY + 1 < poWK->nSrcYSize)
3178 : {
3179 563904 : const double dfMult = dfRatioX * (1.0 - dfRatioY);
3180 :
3181 563904 : dfAccumulatorDivisor += dfMult;
3182 :
3183 563904 : dfAccumulator += double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfMult;
3184 : }
3185 :
3186 : /* -------------------------------------------------------------------- */
3187 : /* Return result. */
3188 : /* -------------------------------------------------------------------- */
3189 2946510 : double dfValue = 0.0;
3190 :
3191 2946510 : if (dfAccumulatorDivisor < 0.00001)
3192 : {
3193 0 : *pValue = 0;
3194 0 : return false;
3195 : }
3196 2946510 : else if (dfAccumulatorDivisor == 1.0)
3197 : {
3198 22176 : dfValue = dfAccumulator;
3199 : }
3200 : else
3201 : {
3202 2924328 : dfValue = dfAccumulator / dfAccumulatorDivisor;
3203 : }
3204 :
3205 2946510 : *pValue = GWKRoundValueT<T>(dfValue);
3206 :
3207 2946510 : return true;
3208 : }
3209 :
3210 : /************************************************************************/
3211 : /* GWKCubicResample() */
3212 : /* Set of bicubic interpolators using cubic convolution. */
3213 : /************************************************************************/
3214 :
3215 : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
3216 : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
3217 : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
3218 :
3219 : template <typename T>
3220 1810720 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
3221 : T f1, T f2, T f3)
3222 : {
3223 1810720 : return (f1 + T(0.5) * (distance1 * (f2 - f0) +
3224 1810720 : distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
3225 1810720 : distance3 * (3 * (f1 - f2) + f3 - f0)));
3226 : }
3227 :
3228 : /************************************************************************/
3229 : /* GWKCubicComputeWeights() */
3230 : /************************************************************************/
3231 :
3232 : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
3233 :
3234 : template <typename T>
3235 97781060 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
3236 : {
3237 97781060 : const T halfX = T(0.5) * x;
3238 97781060 : const T threeX = T(3.0) * x;
3239 97781060 : const T halfX2 = halfX * x;
3240 :
3241 97781060 : coeffs[0] = halfX * (-1 + x * (2 - x));
3242 97781060 : coeffs[1] = 1 + halfX2 * (-5 + threeX);
3243 97781060 : coeffs[2] = halfX * (1 + x * (4 - threeX));
3244 97781060 : coeffs[3] = halfX2 * (-1 + x);
3245 97781060 : }
3246 :
3247 14682546 : template <typename T> inline double CONVOL4(const double v1[4], const T v2[4])
3248 : {
3249 14682546 : return v1[0] * double(v2[0]) + v1[1] * double(v2[1]) +
3250 14682546 : v1[2] * double(v2[2]) + v1[3] * double(v2[3]);
3251 : }
3252 :
3253 : #if 0
3254 : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
3255 : // instead of 17.
3256 : // TODO(schwehr): Use an inline function.
3257 : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \
3258 : { \
3259 : const double dfX = dfX_; \
3260 : dfHalfX = 0.5 * dfX; \
3261 : const double dfThreeX = 3.0 * dfX; \
3262 : const double dfXMinus1 = dfX - 1; \
3263 : \
3264 : adfCoeffs[0] = -1 + dfX * (2 - dfX); \
3265 : adfCoeffs[1] = dfX * (-5 + dfThreeX); \
3266 : /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \
3267 : adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \
3268 : /*adfCoeffs[3] = dfX * (-1 + dfX); */ \
3269 : adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \
3270 : }
3271 :
3272 : // TODO(schwehr): Use an inline function.
3273 : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \
3274 : ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
3275 : (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
3276 : #endif
3277 :
3278 302045 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
3279 : double dfSrcX, double dfSrcY,
3280 : double *pdfDensity, double *pdfReal,
3281 : double *pdfImag)
3282 :
3283 : {
3284 302045 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3285 302045 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3286 302045 : GPtrDiff_t iSrcOffset =
3287 302045 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3288 302045 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3289 302045 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3290 302045 : double adfDensity[4] = {};
3291 302045 : double adfReal[4] = {};
3292 302045 : double adfImag[4] = {};
3293 :
3294 : // Get the bilinear interpolation at the image borders.
3295 302045 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3296 286140 : iSrcY + 2 >= poWK->nSrcYSize)
3297 24670 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3298 24670 : pdfDensity, pdfReal, pdfImag);
3299 :
3300 277375 : double adfValueDens[4] = {};
3301 277375 : double adfValueReal[4] = {};
3302 277375 : double adfValueImag[4] = {};
3303 :
3304 277375 : double adfCoeffsX[4] = {};
3305 277375 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3306 :
3307 1240570 : for (GPtrDiff_t i = -1; i < 3; i++)
3308 : {
3309 1009640 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3310 998035 : 2, adfDensity, adfReal, adfImag) ||
3311 998035 : adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3312 980395 : adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3313 2979770 : adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3314 972094 : adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3315 : {
3316 46449 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3317 46449 : pdfDensity, pdfReal, pdfImag);
3318 : }
3319 :
3320 963196 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3321 963196 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3322 963196 : adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
3323 : }
3324 :
3325 : /* -------------------------------------------------------------------- */
3326 : /* For now, if we have any pixels missing in the kernel area, */
3327 : /* we fallback on using bilinear interpolation. Ideally we */
3328 : /* should do "weight adjustment" of our results similarly to */
3329 : /* what is done for the cubic spline and lanc. interpolators. */
3330 : /* -------------------------------------------------------------------- */
3331 :
3332 230926 : double adfCoeffsY[4] = {};
3333 230926 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3334 :
3335 230926 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3336 230926 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3337 230926 : *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
3338 :
3339 230926 : return true;
3340 : }
3341 :
3342 : #ifdef USE_SSE2
3343 :
3344 : /************************************************************************/
3345 : /* XMMLoad4Values() */
3346 : /* */
3347 : /* Load 4 packed byte or uint16, cast them to float and put them in a */
3348 : /* m128 register. */
3349 : /************************************************************************/
3350 :
3351 567016000 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
3352 : {
3353 : unsigned int i;
3354 567016000 : memcpy(&i, ptr, 4);
3355 1134030000 : __m128i xmm_i = _mm_cvtsi32_si128(i);
3356 : // Zero extend 4 packed unsigned 8-bit integers in a to packed
3357 : // 32-bit integers.
3358 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3359 : xmm_i = _mm_cvtepu8_epi32(xmm_i);
3360 : #else
3361 1134030000 : xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
3362 1134030000 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3363 : #endif
3364 1134030000 : return _mm_cvtepi32_ps(xmm_i);
3365 : }
3366 :
3367 1108340 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
3368 : {
3369 : GUInt64 i;
3370 1108340 : memcpy(&i, ptr, 8);
3371 2216690 : __m128i xmm_i = _mm_cvtsi64_si128(i);
3372 : // Zero extend 4 packed unsigned 16-bit integers in a to packed
3373 : // 32-bit integers.
3374 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3375 : xmm_i = _mm_cvtepu16_epi32(xmm_i);
3376 : #else
3377 2216690 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3378 : #endif
3379 2216690 : return _mm_cvtepi32_ps(xmm_i);
3380 : }
3381 :
3382 : /************************************************************************/
3383 : /* XMMHorizontalAdd() */
3384 : /* */
3385 : /* Return the sum of the 4 floating points of the register. */
3386 : /************************************************************************/
3387 :
3388 : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
3389 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3390 : {
3391 : __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1)
3392 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0)
3393 : shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2)
3394 : sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2)
3395 : return _mm_cvtss_f32(sums);
3396 : }
3397 : #else
3398 142031000 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3399 : {
3400 142031000 : __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2)
3401 142031000 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0)
3402 142031000 : shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1)
3403 142031000 : sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1)
3404 142031000 : return _mm_cvtss_f32(sums);
3405 : }
3406 : #endif
3407 :
3408 : #endif // define USE_SSE2
3409 :
3410 : /************************************************************************/
3411 : /* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */
3412 : /************************************************************************/
3413 :
3414 : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
3415 : // because there are a few assumptions above those types.
3416 : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
3417 : // perf benefit.
3418 :
3419 : template <class T>
3420 389755 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
3421 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3422 : double *pdfDensity, double *pdfReal)
3423 : {
3424 389755 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3425 389755 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3426 389755 : const GPtrDiff_t iSrcOffset =
3427 389755 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3428 :
3429 : // Get the bilinear interpolation at the image borders.
3430 389755 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3431 387271 : iSrcY + 2 >= poWK->nSrcYSize)
3432 : {
3433 2484 : double adfImagIgnored[4] = {};
3434 2484 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3435 2484 : pdfDensity, pdfReal, adfImagIgnored);
3436 : }
3437 :
3438 : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3439 : const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
3440 : const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
3441 :
3442 : // TODO(schwehr): Explain the magic numbers.
3443 : float afTemp[4 + 4 + 4 + 1];
3444 : float *pafAligned =
3445 : reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
3446 : float *pafCoeffs = pafAligned;
3447 : float *pafDensity = pafAligned + 4;
3448 : float *pafValue = pafAligned + 8;
3449 :
3450 : const float fHalfDeltaX = 0.5f * fDeltaX;
3451 : const float fThreeDeltaX = 3.0f * fDeltaX;
3452 : const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
3453 :
3454 : pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
3455 : pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
3456 : pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
3457 : pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
3458 : __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
3459 : const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD_FLOAT);
3460 :
3461 : __m128 xmmMaskLowDensity = _mm_setzero_ps();
3462 : for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
3463 : i++, iOffset += poWK->nSrcXSize)
3464 : {
3465 : const __m128 xmmDensity =
3466 : _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
3467 : xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
3468 : _mm_cmplt_ps(xmmDensity, xmmThreshold));
3469 : pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3470 :
3471 : const __m128 xmmValues =
3472 : XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
3473 : pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
3474 : }
3475 : if (_mm_movemask_ps(xmmMaskLowDensity))
3476 : {
3477 : double adfImagIgnored[4] = {};
3478 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3479 : pdfDensity, pdfReal, adfImagIgnored);
3480 : }
3481 :
3482 : const float fHalfDeltaY = 0.5f * fDeltaY;
3483 : const float fThreeDeltaY = 3.0f * fDeltaY;
3484 : const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
3485 :
3486 : pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
3487 : pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
3488 : pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
3489 : pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
3490 :
3491 : xmmCoeffs = _mm_load_ps(pafCoeffs);
3492 :
3493 : const __m128 xmmDensity = _mm_load_ps(pafDensity);
3494 : const __m128 xmmValue = _mm_load_ps(pafValue);
3495 : *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3496 : *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
3497 :
3498 : // We did all above computations on float32 whereas the general case is
3499 : // float64. Not sure if one is fundamentally more correct than the other
3500 : // one, but we want our optimization to give the same result as the
3501 : // general case as much as possible, so if the resulting value is
3502 : // close to some_int_value + 0.5, redo the computation with the general
3503 : // case.
3504 : // Note: If other types than Byte or UInt16, will need changes.
3505 : if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
3506 : return true;
3507 :
3508 : #endif // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3509 :
3510 387271 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3511 387271 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3512 :
3513 387271 : double adfValueDens[4] = {};
3514 387271 : double adfValueReal[4] = {};
3515 :
3516 387271 : double adfCoeffsX[4] = {};
3517 387271 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3518 :
3519 387271 : double adfCoeffsY[4] = {};
3520 387271 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3521 :
3522 1930200 : for (GPtrDiff_t i = -1; i < 3; i++)
3523 : {
3524 1544480 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3525 : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
3526 1544480 : if (poWK->pafUnifiedSrcDensity[iOffset + 0] <
3527 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3528 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 1] <
3529 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3530 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 2] <
3531 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3532 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 3] <
3533 : SRC_DENSITY_THRESHOLD_FLOAT)
3534 : {
3535 1551 : double adfImagIgnored[4] = {};
3536 1551 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3537 : pdfDensity, pdfReal,
3538 1551 : adfImagIgnored);
3539 : }
3540 : #endif
3541 :
3542 3085860 : adfValueDens[i + 1] =
3543 1542930 : CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
3544 :
3545 1542930 : adfValueReal[i + 1] = CONVOL4(
3546 : adfCoeffsX,
3547 1542930 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3548 : }
3549 :
3550 385720 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3551 385720 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3552 :
3553 385720 : return true;
3554 : }
3555 :
3556 : /************************************************************************/
3557 : /* GWKCubicResampleSrcMaskIsDensity4SampleReal() */
3558 : /* Bi-cubic when source has and only has pafUnifiedSrcDensity. */
3559 : /************************************************************************/
3560 :
3561 0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
3562 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3563 : double *pdfDensity, double *pdfReal)
3564 :
3565 : {
3566 0 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3567 0 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3568 0 : const GPtrDiff_t iSrcOffset =
3569 0 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3570 0 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3571 0 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3572 :
3573 : // Get the bilinear interpolation at the image borders.
3574 0 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3575 0 : iSrcY + 2 >= poWK->nSrcYSize)
3576 : {
3577 0 : double adfImagIgnored[4] = {};
3578 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3579 0 : pdfDensity, pdfReal, adfImagIgnored);
3580 : }
3581 :
3582 0 : double adfCoeffsX[4] = {};
3583 0 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3584 :
3585 0 : double adfCoeffsY[4] = {};
3586 0 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3587 :
3588 0 : double adfValueDens[4] = {};
3589 0 : double adfValueReal[4] = {};
3590 0 : double adfDensity[4] = {};
3591 0 : double adfReal[4] = {};
3592 0 : double adfImagIgnored[4] = {};
3593 :
3594 0 : for (GPtrDiff_t i = -1; i < 3; i++)
3595 : {
3596 0 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3597 0 : 2, adfDensity, adfReal, adfImagIgnored) ||
3598 0 : adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3599 0 : adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3600 0 : adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3601 0 : adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3602 : {
3603 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3604 : pdfDensity, pdfReal,
3605 0 : adfImagIgnored);
3606 : }
3607 :
3608 0 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3609 0 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3610 : }
3611 :
3612 0 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3613 0 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3614 :
3615 0 : return true;
3616 : }
3617 :
3618 : template <class T>
3619 2300964 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3620 : int iBand, double dfSrcX,
3621 : double dfSrcY, T *pValue)
3622 :
3623 : {
3624 2300964 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3625 2300964 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3626 2300964 : const GPtrDiff_t iSrcOffset =
3627 2300964 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3628 2300964 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3629 2300964 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3630 2300964 : const double dfDeltaY2 = dfDeltaY * dfDeltaY;
3631 2300964 : const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
3632 :
3633 : // Get the bilinear interpolation at the image borders.
3634 2300964 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3635 1883033 : iSrcY + 2 >= poWK->nSrcYSize)
3636 490244 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3637 490244 : pValue);
3638 :
3639 1810720 : double adfCoeffs[4] = {};
3640 1810720 : GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
3641 :
3642 1810720 : double adfValue[4] = {};
3643 :
3644 9053590 : for (GPtrDiff_t i = -1; i < 3; i++)
3645 : {
3646 7242876 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3647 :
3648 7242876 : adfValue[i + 1] = CONVOL4(
3649 : adfCoeffs,
3650 7242876 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3651 : }
3652 :
3653 : const double dfValue =
3654 1810720 : CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
3655 : adfValue[1], adfValue[2], adfValue[3]);
3656 :
3657 1810720 : *pValue = GWKClampValueT<T>(dfValue);
3658 :
3659 1810720 : return true;
3660 : }
3661 :
3662 : /************************************************************************/
3663 : /* GWKLanczosSinc() */
3664 : /************************************************************************/
3665 :
3666 : /*
3667 : * Lanczos windowed sinc interpolation kernel with radius r.
3668 : * /
3669 : * | sinc(x) * sinc(x/r), if |x| < r
3670 : * L(x) = | 1, if x = 0 ,
3671 : * | 0, otherwise
3672 : * \
3673 : *
3674 : * where sinc(x) = sin(PI * x) / (PI * x).
3675 : */
3676 :
3677 1632 : static double GWKLanczosSinc(double dfX)
3678 : {
3679 1632 : if (dfX == 0.0)
3680 0 : return 1.0;
3681 :
3682 1632 : const double dfPIX = M_PI * dfX;
3683 1632 : const double dfPIXoverR = dfPIX / 3;
3684 1632 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3685 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3686 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3687 1632 : const double dfSinPIXoverR = sin(dfPIXoverR);
3688 1632 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3689 1632 : const double dfSinPIXMulSinPIXoverR =
3690 1632 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3691 1632 : return dfSinPIXMulSinPIXoverR / dfPIX2overR;
3692 : }
3693 :
3694 106692 : static double GWKLanczosSinc4Values(double *padfValues)
3695 : {
3696 533460 : for (int i = 0; i < 4; i++)
3697 : {
3698 426768 : if (padfValues[i] == 0.0)
3699 : {
3700 0 : padfValues[i] = 1.0;
3701 : }
3702 : else
3703 : {
3704 426768 : const double dfPIX = M_PI * padfValues[i];
3705 426768 : const double dfPIXoverR = dfPIX / 3;
3706 426768 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3707 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3708 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3709 426768 : const double dfSinPIXoverR = sin(dfPIXoverR);
3710 426768 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3711 426768 : const double dfSinPIXMulSinPIXoverR =
3712 426768 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3713 426768 : padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
3714 : }
3715 : }
3716 106692 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3717 : }
3718 :
3719 : /************************************************************************/
3720 : /* GWKBilinear() */
3721 : /************************************************************************/
3722 :
3723 6336240 : static double GWKBilinear(double dfX)
3724 : {
3725 6336240 : double dfAbsX = fabs(dfX);
3726 6336240 : if (dfAbsX <= 1.0)
3727 5866920 : return 1 - dfAbsX;
3728 : else
3729 469322 : return 0.0;
3730 : }
3731 :
3732 106410 : static double GWKBilinear4Values(double *padfValues)
3733 : {
3734 106410 : double dfAbsX0 = fabs(padfValues[0]);
3735 106410 : double dfAbsX1 = fabs(padfValues[1]);
3736 106410 : double dfAbsX2 = fabs(padfValues[2]);
3737 106410 : double dfAbsX3 = fabs(padfValues[3]);
3738 106410 : if (dfAbsX0 <= 1.0)
3739 106410 : padfValues[0] = 1 - dfAbsX0;
3740 : else
3741 0 : padfValues[0] = 0.0;
3742 106410 : if (dfAbsX1 <= 1.0)
3743 106410 : padfValues[1] = 1 - dfAbsX1;
3744 : else
3745 0 : padfValues[1] = 0.0;
3746 106410 : if (dfAbsX2 <= 1.0)
3747 106410 : padfValues[2] = 1 - dfAbsX2;
3748 : else
3749 0 : padfValues[2] = 0.0;
3750 106410 : if (dfAbsX3 <= 1.0)
3751 106394 : padfValues[3] = 1 - dfAbsX3;
3752 : else
3753 16 : padfValues[3] = 0.0;
3754 106410 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3755 : }
3756 :
3757 : /************************************************************************/
3758 : /* GWKCubic() */
3759 : /************************************************************************/
3760 :
3761 82838 : static double GWKCubic(double dfX)
3762 : {
3763 82838 : return CubicKernel(dfX);
3764 : }
3765 :
3766 2442490 : static double GWKCubic4Values(double *padfValues)
3767 : {
3768 2442490 : const double dfAbsX_0 = fabs(padfValues[0]);
3769 2442490 : const double dfAbsX_1 = fabs(padfValues[1]);
3770 2442490 : const double dfAbsX_2 = fabs(padfValues[2]);
3771 2442490 : const double dfAbsX_3 = fabs(padfValues[3]);
3772 2442490 : const double dfX2_0 = padfValues[0] * padfValues[0];
3773 2442490 : const double dfX2_1 = padfValues[1] * padfValues[1];
3774 2442490 : const double dfX2_2 = padfValues[2] * padfValues[2];
3775 2442490 : const double dfX2_3 = padfValues[3] * padfValues[3];
3776 :
3777 2442490 : double dfVal0 = 0.0;
3778 2442490 : if (dfAbsX_0 <= 1.0)
3779 855505 : dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
3780 1586990 : else if (dfAbsX_0 <= 2.0)
3781 1586810 : dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
3782 :
3783 2442490 : double dfVal1 = 0.0;
3784 2442490 : if (dfAbsX_1 <= 1.0)
3785 1583220 : dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
3786 859273 : else if (dfAbsX_1 <= 2.0)
3787 859273 : dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
3788 :
3789 2442490 : double dfVal2 = 0.0;
3790 2442490 : if (dfAbsX_2 <= 1.0)
3791 1594220 : dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
3792 848269 : else if (dfAbsX_2 <= 2.0)
3793 848269 : dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
3794 :
3795 2442490 : double dfVal3 = 0.0;
3796 2442490 : if (dfAbsX_3 <= 1.0)
3797 866232 : dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
3798 1576260 : else if (dfAbsX_3 <= 2.0)
3799 1576100 : dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
3800 :
3801 2442490 : padfValues[0] = dfVal0;
3802 2442490 : padfValues[1] = dfVal1;
3803 2442490 : padfValues[2] = dfVal2;
3804 2442490 : padfValues[3] = dfVal3;
3805 2442490 : return dfVal0 + dfVal1 + dfVal2 + dfVal3;
3806 : }
3807 :
3808 : /************************************************************************/
3809 : /* GWKBSpline() */
3810 : /************************************************************************/
3811 :
3812 : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
3813 : // Equation 8 with (B,C)=(1,0)
3814 : // 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1
3815 : // 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| >= 1 and |x| < 2
3816 :
3817 136640 : static double GWKBSpline(double x)
3818 : {
3819 136640 : const double xp2 = x + 2.0;
3820 136640 : const double xp1 = x + 1.0;
3821 136640 : const double xm1 = x - 1.0;
3822 :
3823 : // This will most likely be used, so we'll compute it ahead of time to
3824 : // avoid stalling the processor.
3825 136640 : const double xp2c = xp2 * xp2 * xp2;
3826 :
3827 : // Note that the test is computed only if it is needed.
3828 : // TODO(schwehr): Make this easier to follow.
3829 : return xp2 > 0.0
3830 273280 : ? ((xp1 > 0.0)
3831 136640 : ? ((x > 0.0)
3832 122246 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3833 87748 : 6.0 * x * x * x
3834 : : 0.0) +
3835 122246 : -4.0 * xp1 * xp1 * xp1
3836 : : 0.0) +
3837 : xp2c
3838 136640 : : 0.0; // * 0.166666666666666666666
3839 : }
3840 :
3841 1895140 : static double GWKBSpline4Values(double *padfValues)
3842 : {
3843 9475680 : for (int i = 0; i < 4; i++)
3844 : {
3845 7580540 : const double x = padfValues[i];
3846 7580540 : const double xp2 = x + 2.0;
3847 7580540 : const double xp1 = x + 1.0;
3848 7580540 : const double xm1 = x - 1.0;
3849 :
3850 : // This will most likely be used, so we'll compute it ahead of time to
3851 : // avoid stalling the processor.
3852 7580540 : const double xp2c = xp2 * xp2 * xp2;
3853 :
3854 : // Note that the test is computed only if it is needed.
3855 : // TODO(schwehr): Make this easier to follow.
3856 7580540 : padfValues[i] =
3857 : (xp2 > 0.0)
3858 15104300 : ? ((xp1 > 0.0)
3859 7523710 : ? ((x > 0.0)
3860 5656540 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3861 3788440 : 6.0 * x * x * x
3862 : : 0.0) +
3863 5656540 : -4.0 * xp1 * xp1 * xp1
3864 : : 0.0) +
3865 : xp2c
3866 : : 0.0; // * 0.166666666666666666666
3867 : }
3868 1895140 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3869 : }
3870 : /************************************************************************/
3871 : /* GWKResampleWrkStruct */
3872 : /************************************************************************/
3873 :
3874 : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
3875 :
3876 : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
3877 : double dfSrcX, double dfSrcY,
3878 : double *pdfDensity, double *pdfReal,
3879 : double *pdfImag,
3880 : GWKResampleWrkStruct *psWrkStruct);
3881 :
3882 : struct _GWKResampleWrkStruct
3883 : {
3884 : pfnGWKResampleType pfnGWKResample;
3885 :
3886 : // Space for saved X weights.
3887 : double *padfWeightsX;
3888 : bool *pabCalcX;
3889 :
3890 : double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos.
3891 : int iLastSrcX; // Only used by GWKResampleOptimizedLanczos.
3892 : int iLastSrcY; // Only used by GWKResampleOptimizedLanczos.
3893 : double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos.
3894 : double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos.
3895 : double dfCosPiXScale; // Only used by GWKResampleOptimizedLanczos.
3896 : double dfSinPiXScale; // Only used by GWKResampleOptimizedLanczos.
3897 : double dfCosPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3898 : double dfSinPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3899 : double dfCosPiYScale; // Only used by GWKResampleOptimizedLanczos.
3900 : double dfSinPiYScale; // Only used by GWKResampleOptimizedLanczos.
3901 : double dfCosPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3902 : double dfSinPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3903 :
3904 : // Space for saving a row of pixels.
3905 : double *padfRowDensity;
3906 : double *padfRowReal;
3907 : double *padfRowImag;
3908 : };
3909 :
3910 : /************************************************************************/
3911 : /* GWKResampleCreateWrkStruct() */
3912 : /************************************************************************/
3913 :
3914 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3915 : double dfSrcY, double *pdfDensity, double *pdfReal,
3916 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
3917 :
3918 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3919 : double dfSrcX, double dfSrcY,
3920 : double *pdfDensity, double *pdfReal,
3921 : double *pdfImag,
3922 : GWKResampleWrkStruct *psWrkStruct);
3923 :
3924 401 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
3925 : {
3926 401 : const int nXDist = (poWK->nXRadius + 1) * 2;
3927 401 : const int nYDist = (poWK->nYRadius + 1) * 2;
3928 :
3929 : GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
3930 401 : CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
3931 :
3932 : // Alloc space for saved X weights.
3933 401 : psWrkStruct->padfWeightsX =
3934 401 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3935 401 : psWrkStruct->pabCalcX =
3936 401 : static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
3937 :
3938 401 : psWrkStruct->padfWeightsY =
3939 401 : static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
3940 401 : psWrkStruct->iLastSrcX = -10;
3941 401 : psWrkStruct->iLastSrcY = -10;
3942 401 : psWrkStruct->dfLastDeltaX = -10;
3943 401 : psWrkStruct->dfLastDeltaY = -10;
3944 :
3945 : // Alloc space for saving a row of pixels.
3946 401 : if (poWK->pafUnifiedSrcDensity == nullptr &&
3947 365 : poWK->panUnifiedSrcValid == nullptr &&
3948 342 : poWK->papanBandSrcValid == nullptr)
3949 : {
3950 342 : psWrkStruct->padfRowDensity = nullptr;
3951 : }
3952 : else
3953 : {
3954 59 : psWrkStruct->padfRowDensity =
3955 59 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3956 : }
3957 401 : psWrkStruct->padfRowReal =
3958 401 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3959 401 : psWrkStruct->padfRowImag =
3960 401 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3961 :
3962 401 : if (poWK->eResample == GRA_Lanczos)
3963 : {
3964 65 : psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
3965 :
3966 65 : if (poWK->dfXScale < 1)
3967 : {
3968 4 : psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
3969 4 : psWrkStruct->dfSinPiXScaleOver3 =
3970 4 : sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
3971 4 : psWrkStruct->dfCosPiXScaleOver3);
3972 : // "Naive":
3973 : // const double dfCosPiXScale = cos( M_PI * dfXScale );
3974 : // const double dfSinPiXScale = sin( M_PI * dfXScale );
3975 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3976 4 : psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
3977 4 : psWrkStruct->dfCosPiXScaleOver3 -
3978 4 : 3) *
3979 4 : psWrkStruct->dfCosPiXScaleOver3;
3980 4 : psWrkStruct->dfSinPiXScale = sqrt(
3981 4 : 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
3982 : }
3983 :
3984 65 : if (poWK->dfYScale < 1)
3985 : {
3986 12 : psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
3987 12 : psWrkStruct->dfSinPiYScaleOver3 =
3988 12 : sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
3989 12 : psWrkStruct->dfCosPiYScaleOver3);
3990 : // "Naive":
3991 : // const double dfCosPiYScale = cos( M_PI * dfYScale );
3992 : // const double dfSinPiYScale = sin( M_PI * dfYScale );
3993 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3994 12 : psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
3995 12 : psWrkStruct->dfCosPiYScaleOver3 -
3996 12 : 3) *
3997 12 : psWrkStruct->dfCosPiYScaleOver3;
3998 12 : psWrkStruct->dfSinPiYScale = sqrt(
3999 12 : 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
4000 : }
4001 : }
4002 : else
4003 336 : psWrkStruct->pfnGWKResample = GWKResample;
4004 :
4005 401 : return psWrkStruct;
4006 : }
4007 :
4008 : /************************************************************************/
4009 : /* GWKResampleDeleteWrkStruct() */
4010 : /************************************************************************/
4011 :
4012 401 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
4013 : {
4014 401 : CPLFree(psWrkStruct->padfWeightsX);
4015 401 : CPLFree(psWrkStruct->padfWeightsY);
4016 401 : CPLFree(psWrkStruct->pabCalcX);
4017 401 : CPLFree(psWrkStruct->padfRowDensity);
4018 401 : CPLFree(psWrkStruct->padfRowReal);
4019 401 : CPLFree(psWrkStruct->padfRowImag);
4020 401 : CPLFree(psWrkStruct);
4021 401 : }
4022 :
4023 : /************************************************************************/
4024 : /* GWKResample() */
4025 : /************************************************************************/
4026 :
4027 239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4028 : double dfSrcY, double *pdfDensity, double *pdfReal,
4029 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
4030 :
4031 : {
4032 : // Save as local variables to avoid following pointers in loops.
4033 239383 : const int nSrcXSize = poWK->nSrcXSize;
4034 239383 : const int nSrcYSize = poWK->nSrcYSize;
4035 :
4036 239383 : double dfAccumulatorReal = 0.0;
4037 239383 : double dfAccumulatorImag = 0.0;
4038 239383 : double dfAccumulatorDensity = 0.0;
4039 239383 : double dfAccumulatorWeight = 0.0;
4040 239383 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4041 239383 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4042 239383 : const GPtrDiff_t iSrcOffset =
4043 239383 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4044 239383 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4045 239383 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4046 :
4047 239383 : const double dfXScale = poWK->dfXScale;
4048 239383 : const double dfYScale = poWK->dfYScale;
4049 :
4050 239383 : const int nXDist = (poWK->nXRadius + 1) * 2;
4051 :
4052 : // Space for saved X weights.
4053 239383 : double *padfWeightsX = psWrkStruct->padfWeightsX;
4054 239383 : bool *pabCalcX = psWrkStruct->pabCalcX;
4055 :
4056 : // Space for saving a row of pixels.
4057 239383 : double *padfRowDensity = psWrkStruct->padfRowDensity;
4058 239383 : double *padfRowReal = psWrkStruct->padfRowReal;
4059 239383 : double *padfRowImag = psWrkStruct->padfRowImag;
4060 :
4061 : // Mark as needing calculation (don't calculate the weights yet,
4062 : // because a mask may render it unnecessary).
4063 239383 : memset(pabCalcX, false, nXDist * sizeof(bool));
4064 :
4065 239383 : FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
4066 239383 : CPLAssert(pfnGetWeight);
4067 :
4068 : // Skip sampling over edge of image.
4069 239383 : int j = poWK->nFiltInitY;
4070 239383 : int jMax = poWK->nYRadius;
4071 239383 : if (iSrcY + j < 0)
4072 566 : j = -iSrcY;
4073 239383 : if (iSrcY + jMax >= nSrcYSize)
4074 662 : jMax = nSrcYSize - iSrcY - 1;
4075 :
4076 239383 : int iMin = poWK->nFiltInitX;
4077 239383 : int iMax = poWK->nXRadius;
4078 239383 : if (iSrcX + iMin < 0)
4079 566 : iMin = -iSrcX;
4080 239383 : if (iSrcX + iMax >= nSrcXSize)
4081 659 : iMax = nSrcXSize - iSrcX - 1;
4082 :
4083 239383 : const int bXScaleBelow1 = (dfXScale < 1.0);
4084 239383 : const int bYScaleBelow1 = (dfYScale < 1.0);
4085 :
4086 239383 : GPtrDiff_t iRowOffset =
4087 239383 : iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
4088 :
4089 : // Loop over pixel rows in the kernel.
4090 1445930 : for (; j <= jMax; ++j)
4091 : {
4092 1206540 : iRowOffset += nSrcXSize;
4093 :
4094 : // Get pixel values.
4095 : // We can potentially read extra elements after the "normal" end of the
4096 : // source arrays, but the contract of papabySrcImage[iBand],
4097 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4098 : // is to have WARP_EXTRA_ELTS reserved at their end.
4099 1206540 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4100 : padfRowDensity, padfRowReal, padfRowImag))
4101 72 : continue;
4102 :
4103 : // Calculate the Y weight.
4104 : double dfWeight1 = (bYScaleBelow1)
4105 1206470 : ? pfnGetWeight((j - dfDeltaY) * dfYScale)
4106 1600 : : pfnGetWeight(j - dfDeltaY);
4107 :
4108 : // Iterate over pixels in row.
4109 1206470 : double dfAccumulatorRealLocal = 0.0;
4110 1206470 : double dfAccumulatorImagLocal = 0.0;
4111 1206470 : double dfAccumulatorDensityLocal = 0.0;
4112 1206470 : double dfAccumulatorWeightLocal = 0.0;
4113 :
4114 7317420 : for (int i = iMin; i <= iMax; ++i)
4115 : {
4116 : // Skip sampling if pixel has zero density.
4117 6110940 : if (padfRowDensity != nullptr &&
4118 77277 : padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
4119 546 : continue;
4120 :
4121 6110400 : double dfWeight2 = 0.0;
4122 :
4123 : // Make or use a cached set of weights for this row.
4124 6110400 : if (pabCalcX[i - iMin])
4125 : {
4126 : // Use saved weight value instead of recomputing it.
4127 4903920 : dfWeight2 = padfWeightsX[i - iMin];
4128 : }
4129 : else
4130 : {
4131 : // Calculate & save the X weight.
4132 1206480 : padfWeightsX[i - iMin] = dfWeight2 =
4133 1206480 : (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
4134 1600 : : pfnGetWeight(i - dfDeltaX);
4135 :
4136 1206480 : pabCalcX[i - iMin] = true;
4137 : }
4138 :
4139 : // Accumulate!
4140 6110400 : dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
4141 6110400 : dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
4142 6110400 : if (padfRowDensity != nullptr)
4143 76731 : dfAccumulatorDensityLocal +=
4144 76731 : padfRowDensity[i - iMin] * dfWeight2;
4145 6110400 : dfAccumulatorWeightLocal += dfWeight2;
4146 : }
4147 :
4148 1206470 : dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
4149 1206470 : dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
4150 1206470 : dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
4151 1206470 : dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
4152 : }
4153 :
4154 239383 : if (dfAccumulatorWeight < 0.000001 ||
4155 1887 : (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
4156 : {
4157 0 : *pdfDensity = 0.0;
4158 0 : return false;
4159 : }
4160 :
4161 : // Calculate the output taking into account weighting.
4162 239383 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4163 : {
4164 239380 : *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
4165 239380 : *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
4166 239380 : if (padfRowDensity != nullptr)
4167 1884 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
4168 : else
4169 237496 : *pdfDensity = 1.0;
4170 : }
4171 : else
4172 : {
4173 3 : *pdfReal = dfAccumulatorReal;
4174 3 : *pdfImag = dfAccumulatorImag;
4175 3 : if (padfRowDensity != nullptr)
4176 3 : *pdfDensity = dfAccumulatorDensity;
4177 : else
4178 0 : *pdfDensity = 1.0;
4179 : }
4180 :
4181 239383 : return true;
4182 : }
4183 :
4184 : /************************************************************************/
4185 : /* GWKResampleOptimizedLanczos() */
4186 : /************************************************************************/
4187 :
4188 634574 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
4189 : double dfSrcX, double dfSrcY,
4190 : double *pdfDensity, double *pdfReal,
4191 : double *pdfImag,
4192 : GWKResampleWrkStruct *psWrkStruct)
4193 :
4194 : {
4195 : // Save as local variables to avoid following pointers in loops.
4196 634574 : const int nSrcXSize = poWK->nSrcXSize;
4197 634574 : const int nSrcYSize = poWK->nSrcYSize;
4198 :
4199 634574 : double dfAccumulatorReal = 0.0;
4200 634574 : double dfAccumulatorImag = 0.0;
4201 634574 : double dfAccumulatorDensity = 0.0;
4202 634574 : double dfAccumulatorWeight = 0.0;
4203 634574 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4204 634574 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4205 634574 : const GPtrDiff_t iSrcOffset =
4206 634574 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4207 634574 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4208 634574 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4209 :
4210 634574 : const double dfXScale = poWK->dfXScale;
4211 634574 : const double dfYScale = poWK->dfYScale;
4212 :
4213 : // Space for saved X weights.
4214 634574 : double *const padfWeightsXShifted =
4215 634574 : psWrkStruct->padfWeightsX - poWK->nFiltInitX;
4216 634574 : double *const padfWeightsYShifted =
4217 634574 : psWrkStruct->padfWeightsY - poWK->nFiltInitY;
4218 :
4219 : // Space for saving a row of pixels.
4220 634574 : double *const padfRowDensity = psWrkStruct->padfRowDensity;
4221 634574 : double *const padfRowReal = psWrkStruct->padfRowReal;
4222 634574 : double *const padfRowImag = psWrkStruct->padfRowImag;
4223 :
4224 : // Skip sampling over edge of image.
4225 634574 : int jMin = poWK->nFiltInitY;
4226 634574 : int jMax = poWK->nYRadius;
4227 634574 : if (iSrcY + jMin < 0)
4228 17334 : jMin = -iSrcY;
4229 634574 : if (iSrcY + jMax >= nSrcYSize)
4230 5638 : jMax = nSrcYSize - iSrcY - 1;
4231 :
4232 634574 : int iMin = poWK->nFiltInitX;
4233 634574 : int iMax = poWK->nXRadius;
4234 634574 : if (iSrcX + iMin < 0)
4235 19595 : iMin = -iSrcX;
4236 634574 : if (iSrcX + iMax >= nSrcXSize)
4237 6817 : iMax = nSrcXSize - iSrcX - 1;
4238 :
4239 634574 : if (dfXScale < 1.0)
4240 : {
4241 462945 : while ((iMin - dfDeltaX) * dfXScale < -3.0)
4242 260083 : iMin++;
4243 263534 : while ((iMax - dfDeltaX) * dfXScale > 3.0)
4244 60672 : iMax--;
4245 :
4246 : // clang-format off
4247 : /*
4248 : Naive version:
4249 : for (int i = iMin; i <= iMax; ++i)
4250 : {
4251 : psWrkStruct->padfWeightsXShifted[i] =
4252 : GWKLanczosSinc((i - dfDeltaX) * dfXScale);
4253 : }
4254 :
4255 : but given that:
4256 :
4257 : GWKLanczosSinc(x):
4258 : if (dfX == 0.0)
4259 : return 1.0;
4260 :
4261 : const double dfPIX = M_PI * dfX;
4262 : const double dfPIXoverR = dfPIX / 3;
4263 : const double dfPIX2overR = dfPIX * dfPIXoverR;
4264 : return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
4265 :
4266 : and
4267 : sin (a + b) = sin a cos b + cos a sin b.
4268 : cos (a + b) = cos a cos b - sin a sin b.
4269 :
4270 : we can skip any sin() computation within the loop
4271 : */
4272 : // clang-format on
4273 :
4274 202862 : if (iSrcX != psWrkStruct->iLastSrcX ||
4275 131072 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4276 : {
4277 71790 : double dfX = (iMin - dfDeltaX) * dfXScale;
4278 :
4279 71790 : double dfPIXover3 = M_PI / 3 * dfX;
4280 71790 : double dfCosOver3 = cos(dfPIXover3);
4281 71790 : double dfSinOver3 = sin(dfPIXover3);
4282 :
4283 : // "Naive":
4284 : // double dfSin = sin( M_PI * dfX );
4285 : // double dfCos = cos( M_PI * dfX );
4286 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4287 71790 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4288 71790 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4289 :
4290 71790 : const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
4291 71790 : const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
4292 71790 : const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
4293 71790 : const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
4294 71790 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4295 71790 : padfWeightsXShifted[iMin] =
4296 71790 : dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
4297 683646 : for (int i = iMin + 1; i <= iMax; ++i)
4298 : {
4299 611856 : dfX += dfXScale;
4300 611856 : const double dfNewSin =
4301 611856 : dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
4302 611856 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
4303 611856 : dfCosOver3 * dfSinPiXScaleOver3;
4304 611856 : padfWeightsXShifted[i] =
4305 : dfX == 0
4306 611856 : ? 1.0
4307 611856 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
4308 611856 : const double dfNewCos =
4309 611856 : dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
4310 611856 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
4311 611856 : dfSinOver3 * dfSinPiXScaleOver3;
4312 611856 : dfSin = dfNewSin;
4313 611856 : dfCos = dfNewCos;
4314 611856 : dfSinOver3 = dfNewSinOver3;
4315 611856 : dfCosOver3 = dfNewCosOver3;
4316 : }
4317 :
4318 71790 : psWrkStruct->iLastSrcX = iSrcX;
4319 71790 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4320 : }
4321 : }
4322 : else
4323 : {
4324 789372 : while (iMin - dfDeltaX < -3.0)
4325 357660 : iMin++;
4326 431712 : while (iMax - dfDeltaX > 3.0)
4327 0 : iMax--;
4328 :
4329 431712 : if (iSrcX != psWrkStruct->iLastSrcX ||
4330 225330 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4331 : {
4332 : // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
4333 : // following trigonometric formulas.
4334 :
4335 : // TODO(schwehr): Move this somewhere where it can be rendered at
4336 : // LaTeX.
4337 : // clang-format off
4338 : // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
4339 : // cos(M_PI * dfBase) * sin(M_PI * k)
4340 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
4341 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
4342 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
4343 :
4344 : // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
4345 : // cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
4346 : // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
4347 : // clang-format on
4348 :
4349 420092 : const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
4350 420092 : const double dfSin2PIDeltaXOver3 =
4351 : dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
4352 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
4353 420092 : const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
4354 420092 : const double dfSinPIDeltaX =
4355 420092 : (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
4356 420092 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4357 420092 : const double dfInvPI2Over3xSinPIDeltaX =
4358 : dfInvPI2Over3 * dfSinPIDeltaX;
4359 420092 : const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
4360 420092 : -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
4361 420092 : const double dfSinPIOver3 = 0.8660254037844386;
4362 420092 : const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
4363 420092 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
4364 : const double padfCst[] = {
4365 420092 : dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
4366 420092 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
4367 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
4368 420092 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
4369 420092 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
4370 :
4371 2974940 : for (int i = iMin; i <= iMax; ++i)
4372 : {
4373 2554850 : const double dfX = i - dfDeltaX;
4374 2554850 : if (dfX == 0.0)
4375 58282 : padfWeightsXShifted[i] = 1.0;
4376 : else
4377 2496570 : padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
4378 : #if DEBUG_VERBOSE
4379 : // TODO(schwehr): AlmostEqual.
4380 : // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
4381 : // GWKLanczosSinc(dfX, 3.0)) < 1e-10);
4382 : #endif
4383 : }
4384 :
4385 420092 : psWrkStruct->iLastSrcX = iSrcX;
4386 420092 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4387 : }
4388 : }
4389 :
4390 634574 : if (dfYScale < 1.0)
4391 : {
4392 15754 : while ((jMin - dfDeltaY) * dfYScale < -3.0)
4393 9500 : jMin++;
4394 9854 : while ((jMax - dfDeltaY) * dfYScale > 3.0)
4395 3600 : jMax--;
4396 :
4397 : // clang-format off
4398 : /*
4399 : Naive version:
4400 : for (int j = jMin; j <= jMax; ++j)
4401 : {
4402 : padfWeightsYShifted[j] =
4403 : GWKLanczosSinc((j - dfDeltaY) * dfYScale);
4404 : }
4405 : */
4406 : // clang-format on
4407 :
4408 6254 : if (iSrcY != psWrkStruct->iLastSrcY ||
4409 6127 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4410 : {
4411 127 : double dfY = (jMin - dfDeltaY) * dfYScale;
4412 :
4413 127 : double dfPIYover3 = M_PI / 3 * dfY;
4414 127 : double dfCosOver3 = cos(dfPIYover3);
4415 127 : double dfSinOver3 = sin(dfPIYover3);
4416 :
4417 : // "Naive":
4418 : // double dfSin = sin( M_PI * dfY );
4419 : // double dfCos = cos( M_PI * dfY );
4420 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4421 127 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4422 127 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4423 :
4424 127 : const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
4425 127 : const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
4426 127 : const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
4427 127 : const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
4428 127 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4429 127 : padfWeightsYShifted[jMin] =
4430 127 : dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
4431 1210 : for (int j = jMin + 1; j <= jMax; ++j)
4432 : {
4433 1083 : dfY += dfYScale;
4434 1083 : const double dfNewSin =
4435 1083 : dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
4436 1083 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
4437 1083 : dfCosOver3 * dfSinPiYScaleOver3;
4438 1083 : padfWeightsYShifted[j] =
4439 : dfY == 0
4440 1083 : ? 1.0
4441 1083 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
4442 1083 : const double dfNewCos =
4443 1083 : dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
4444 1083 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
4445 1083 : dfSinOver3 * dfSinPiYScaleOver3;
4446 1083 : dfSin = dfNewSin;
4447 1083 : dfCos = dfNewCos;
4448 1083 : dfSinOver3 = dfNewSinOver3;
4449 1083 : dfCosOver3 = dfNewCosOver3;
4450 : }
4451 :
4452 127 : psWrkStruct->iLastSrcY = iSrcY;
4453 127 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4454 : }
4455 : }
4456 : else
4457 : {
4458 1106550 : while (jMin - dfDeltaY < -3.0)
4459 478232 : jMin++;
4460 628320 : while (jMax - dfDeltaY > 3.0)
4461 0 : jMax--;
4462 :
4463 628320 : if (iSrcY != psWrkStruct->iLastSrcY ||
4464 627488 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4465 : {
4466 7198 : const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
4467 7198 : const double dfSin2PIDeltaYOver3 =
4468 : dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
4469 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
4470 7198 : const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
4471 7198 : const double dfSinPIDeltaY =
4472 7198 : (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
4473 7198 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4474 7198 : const double dfInvPI2Over3xSinPIDeltaY =
4475 : dfInvPI2Over3 * dfSinPIDeltaY;
4476 7198 : const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
4477 7198 : -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
4478 7198 : const double dfSinPIOver3 = 0.8660254037844386;
4479 7198 : const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
4480 7198 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
4481 : const double padfCst[] = {
4482 7198 : dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
4483 7198 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
4484 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
4485 7198 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
4486 7198 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
4487 :
4488 47777 : for (int j = jMin; j <= jMax; ++j)
4489 : {
4490 40579 : const double dfY = j - dfDeltaY;
4491 40579 : if (dfY == 0.0)
4492 468 : padfWeightsYShifted[j] = 1.0;
4493 : else
4494 40111 : padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
4495 : #if DEBUG_VERBOSE
4496 : // TODO(schwehr): AlmostEqual.
4497 : // CPLAssert(fabs(padfWeightsYShifted[j] -
4498 : // GWKLanczosSinc(dfY, 3.0)) < 1e-10);
4499 : #endif
4500 : }
4501 :
4502 7198 : psWrkStruct->iLastSrcY = iSrcY;
4503 7198 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4504 : }
4505 : }
4506 :
4507 : // If we have no density information, we can simply compute the
4508 : // accumulated weight.
4509 634574 : if (padfRowDensity == nullptr)
4510 : {
4511 634574 : double dfRowAccWeight = 0.0;
4512 5159250 : for (int i = iMin; i <= iMax; ++i)
4513 : {
4514 4524680 : dfRowAccWeight += padfWeightsXShifted[i];
4515 : }
4516 634574 : double dfColAccWeight = 0.0;
4517 4564130 : for (int j = jMin; j <= jMax; ++j)
4518 : {
4519 3929550 : dfColAccWeight += padfWeightsYShifted[j];
4520 : }
4521 634574 : dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
4522 : }
4523 :
4524 : // Loop over pixel rows in the kernel.
4525 :
4526 634574 : if (poWK->eWorkingDataType == GDT_UInt8 && !poWK->panUnifiedSrcValid &&
4527 633954 : !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
4528 : !padfRowDensity)
4529 : {
4530 : // Optimization for Byte case without any masking/alpha
4531 :
4532 633954 : if (dfAccumulatorWeight < 0.000001)
4533 : {
4534 0 : *pdfDensity = 0.0;
4535 0 : return false;
4536 : }
4537 :
4538 633954 : const GByte *pSrc =
4539 633954 : reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
4540 633954 : pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4541 :
4542 : #if defined(USE_SSE2)
4543 633954 : if (iMax - iMin + 1 == 6)
4544 : {
4545 : // This is just an optimized version of the general case in
4546 : // the else clause.
4547 :
4548 359916 : pSrc += iMin;
4549 359916 : int j = jMin;
4550 : const auto fourXWeights =
4551 359916 : XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
4552 :
4553 : // Process 2 lines at the same time.
4554 1424180 : for (; j < jMax; j += 2)
4555 : {
4556 : const XMMReg4Double v_acc =
4557 1064270 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4558 : const XMMReg4Double v_acc2 =
4559 1064270 : XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
4560 1064270 : const double dfRowAcc = v_acc.GetHorizSum();
4561 1064270 : const double dfRowAccEnd =
4562 1064270 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4563 1064270 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4564 1064270 : dfAccumulatorReal +=
4565 1064270 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4566 1064270 : const double dfRowAcc2 = v_acc2.GetHorizSum();
4567 1064270 : const double dfRowAcc2End =
4568 1064270 : pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
4569 1064270 : pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
4570 1064270 : dfAccumulatorReal +=
4571 1064270 : (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
4572 1064270 : pSrc += 2 * nSrcXSize;
4573 : }
4574 359916 : if (j == jMax)
4575 : {
4576 : // Process last line if there's an odd number of them.
4577 :
4578 : const XMMReg4Double v_acc =
4579 90039 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4580 90039 : const double dfRowAcc = v_acc.GetHorizSum();
4581 90039 : const double dfRowAccEnd =
4582 90039 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4583 90039 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4584 90039 : dfAccumulatorReal +=
4585 90039 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4586 : }
4587 : }
4588 : else
4589 : #endif
4590 : {
4591 1982080 : for (int j = jMin; j <= jMax; ++j)
4592 : {
4593 1708040 : int i = iMin;
4594 1708040 : double dfRowAcc1 = 0.0;
4595 1708040 : double dfRowAcc2 = 0.0;
4596 : // A bit of loop unrolling
4597 8474620 : for (; i < iMax; i += 2)
4598 : {
4599 6766580 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4600 6766580 : dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
4601 : }
4602 1708040 : if (i == iMax)
4603 : {
4604 : // Process last column if there's an odd number of them.
4605 1188570 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4606 : }
4607 :
4608 1708040 : dfAccumulatorReal +=
4609 1708040 : (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
4610 1708040 : pSrc += nSrcXSize;
4611 : }
4612 : }
4613 :
4614 : // Calculate the output taking into account weighting.
4615 633954 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4616 : {
4617 579748 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4618 579748 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4619 579748 : *pdfDensity = 1.0;
4620 : }
4621 : else
4622 : {
4623 54206 : *pdfReal = dfAccumulatorReal;
4624 54206 : *pdfDensity = 1.0;
4625 : }
4626 :
4627 633954 : return true;
4628 : }
4629 :
4630 620 : GPtrDiff_t iRowOffset =
4631 620 : iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
4632 :
4633 620 : int nCountValid = 0;
4634 620 : const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
4635 :
4636 3560 : for (int j = jMin; j <= jMax; ++j)
4637 : {
4638 2940 : iRowOffset += nSrcXSize;
4639 :
4640 : // Get pixel values.
4641 : // We can potentially read extra elements after the "normal" end of the
4642 : // source arrays, but the contract of papabySrcImage[iBand],
4643 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4644 : // is to have WARP_EXTRA_ELTS reserved at their end.
4645 2940 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4646 : padfRowDensity, padfRowReal, padfRowImag))
4647 0 : continue;
4648 :
4649 2940 : const double dfWeight1 = padfWeightsYShifted[j];
4650 :
4651 : // Iterate over pixels in row.
4652 2940 : if (padfRowDensity != nullptr)
4653 : {
4654 0 : for (int i = iMin; i <= iMax; ++i)
4655 : {
4656 : // Skip sampling if pixel has zero density.
4657 0 : if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
4658 0 : continue;
4659 :
4660 0 : nCountValid++;
4661 :
4662 : // Use a cached set of weights for this row.
4663 0 : const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
4664 :
4665 : // Accumulate!
4666 0 : dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
4667 0 : dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
4668 0 : dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
4669 0 : dfAccumulatorWeight += dfWeight2;
4670 : }
4671 : }
4672 2940 : else if (bIsNonComplex)
4673 : {
4674 1764 : double dfRowAccReal = 0.0;
4675 10560 : for (int i = iMin; i <= iMax; ++i)
4676 : {
4677 8796 : const double dfWeight2 = padfWeightsXShifted[i];
4678 :
4679 : // Accumulate!
4680 8796 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4681 : }
4682 :
4683 1764 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4684 : }
4685 : else
4686 : {
4687 1176 : double dfRowAccReal = 0.0;
4688 1176 : double dfRowAccImag = 0.0;
4689 7040 : for (int i = iMin; i <= iMax; ++i)
4690 : {
4691 5864 : const double dfWeight2 = padfWeightsXShifted[i];
4692 :
4693 : // Accumulate!
4694 5864 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4695 5864 : dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
4696 : }
4697 :
4698 1176 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4699 1176 : dfAccumulatorImag += dfRowAccImag * dfWeight1;
4700 : }
4701 : }
4702 :
4703 620 : if (dfAccumulatorWeight < 0.000001 ||
4704 0 : (padfRowDensity != nullptr &&
4705 0 : (dfAccumulatorDensity < 0.000001 ||
4706 0 : nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
4707 : {
4708 0 : *pdfDensity = 0.0;
4709 0 : return false;
4710 : }
4711 :
4712 : // Calculate the output taking into account weighting.
4713 620 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4714 : {
4715 0 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4716 0 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4717 0 : *pdfImag = dfAccumulatorImag * dfInvAcc;
4718 0 : if (padfRowDensity != nullptr)
4719 0 : *pdfDensity = dfAccumulatorDensity * dfInvAcc;
4720 : else
4721 0 : *pdfDensity = 1.0;
4722 : }
4723 : else
4724 : {
4725 620 : *pdfReal = dfAccumulatorReal;
4726 620 : *pdfImag = dfAccumulatorImag;
4727 620 : if (padfRowDensity != nullptr)
4728 0 : *pdfDensity = dfAccumulatorDensity;
4729 : else
4730 620 : *pdfDensity = 1.0;
4731 : }
4732 :
4733 620 : return true;
4734 : }
4735 :
4736 : /************************************************************************/
4737 : /* GWKComputeWeights() */
4738 : /************************************************************************/
4739 :
4740 1091070 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
4741 : double dfDeltaX, double dfXScale, int jMin,
4742 : int jMax, double dfDeltaY, double dfYScale,
4743 : double *padfWeightsHorizontal,
4744 : double *padfWeightsVertical, double &dfInvWeights)
4745 : {
4746 :
4747 1091070 : const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
4748 1091070 : CPLAssert(pfnGetWeight);
4749 1091070 : const FilterFunc4ValuesType pfnGetWeight4Values =
4750 1091070 : apfGWKFilter4Values[eResample];
4751 1091070 : CPLAssert(pfnGetWeight4Values);
4752 :
4753 1091070 : int i = iMin; // Used after for.
4754 1091070 : int iC = 0; // Used after for.
4755 : // Not zero, but as close as possible to it, to avoid potential division by
4756 : // zero at end of function
4757 1091070 : double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
4758 2403700 : for (; i + 2 < iMax; i += 4, iC += 4)
4759 : {
4760 1312620 : padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
4761 1312620 : padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
4762 1312620 : padfWeightsHorizontal[iC + 2] =
4763 1312620 : padfWeightsHorizontal[iC + 1] + dfXScale;
4764 1312620 : padfWeightsHorizontal[iC + 3] =
4765 1312620 : padfWeightsHorizontal[iC + 2] + dfXScale;
4766 1312620 : dfAccumulatorWeightHorizontal +=
4767 1312620 : pfnGetWeight4Values(padfWeightsHorizontal + iC);
4768 : }
4769 1145700 : for (; i <= iMax; ++i, ++iC)
4770 : {
4771 54623 : const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
4772 54623 : padfWeightsHorizontal[iC] = dfWeight;
4773 54623 : dfAccumulatorWeightHorizontal += dfWeight;
4774 : }
4775 :
4776 1091070 : int j = jMin; // Used after for.
4777 1091070 : int jC = 0; // Used after for.
4778 : // Not zero, but as close as possible to it, to avoid potential division by
4779 : // zero at end of function
4780 1091070 : double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
4781 2332840 : for (; j + 2 < jMax; j += 4, jC += 4)
4782 : {
4783 1241770 : padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
4784 1241770 : padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
4785 1241770 : padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
4786 1241770 : padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
4787 1241770 : dfAccumulatorWeightVertical +=
4788 1241770 : pfnGetWeight4Values(padfWeightsVertical + jC);
4789 : }
4790 1152230 : for (; j <= jMax; ++j, ++jC)
4791 : {
4792 61154 : const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
4793 61154 : padfWeightsVertical[jC] = dfWeight;
4794 61154 : dfAccumulatorWeightVertical += dfWeight;
4795 : }
4796 :
4797 1091070 : dfInvWeights =
4798 1091070 : 1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
4799 1091070 : }
4800 :
4801 : /************************************************************************/
4802 : /* GWKResampleNoMasksT() */
4803 : /************************************************************************/
4804 :
4805 : template <class T>
4806 : static bool
4807 : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4808 : double dfSrcY, T *pValue, double *padfWeightsHorizontal,
4809 : double *padfWeightsVertical, double &dfInvWeights)
4810 :
4811 : {
4812 : // Commonly used; save locally.
4813 : const int nSrcXSize = poWK->nSrcXSize;
4814 : const int nSrcYSize = poWK->nSrcYSize;
4815 :
4816 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4817 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4818 : const GPtrDiff_t iSrcOffset =
4819 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4820 :
4821 : const int nXRadius = poWK->nXRadius;
4822 : const int nYRadius = poWK->nYRadius;
4823 :
4824 : // Politely refuse to process invalid coordinates or obscenely small image.
4825 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4826 : nYRadius > nSrcYSize)
4827 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4828 : pValue);
4829 :
4830 : T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
4831 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4832 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4833 :
4834 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4835 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4836 :
4837 : int iMin = 1 - nXRadius;
4838 : if (iSrcX + iMin < 0)
4839 : iMin = -iSrcX;
4840 : int iMax = nXRadius;
4841 : if (iSrcX + iMax >= nSrcXSize - 1)
4842 : iMax = nSrcXSize - 1 - iSrcX;
4843 :
4844 : int jMin = 1 - nYRadius;
4845 : if (iSrcY + jMin < 0)
4846 : jMin = -iSrcY;
4847 : int jMax = nYRadius;
4848 : if (iSrcY + jMax >= nSrcYSize - 1)
4849 : jMax = nSrcYSize - 1 - iSrcY;
4850 :
4851 : if (iBand == 0)
4852 : {
4853 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4854 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4855 : padfWeightsVertical, dfInvWeights);
4856 : }
4857 :
4858 : // Loop over all rows in the kernel.
4859 : double dfAccumulator = 0.0;
4860 : for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
4861 : {
4862 : const GPtrDiff_t iSampJ =
4863 : iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
4864 :
4865 : // Loop over all pixels in the row.
4866 : double dfAccumulatorLocal = 0.0;
4867 : double dfAccumulatorLocal2 = 0.0;
4868 : int iC = 0;
4869 : int i = iMin;
4870 : // Process by chunk of 4 cols.
4871 : for (; i + 2 < iMax; i += 4, iC += 4)
4872 : {
4873 : // Retrieve the pixel & accumulate.
4874 : dfAccumulatorLocal +=
4875 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4876 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4877 : padfWeightsHorizontal[iC + 1];
4878 : dfAccumulatorLocal2 += double(pSrcBand[i + 2 + iSampJ]) *
4879 : padfWeightsHorizontal[iC + 2];
4880 : dfAccumulatorLocal2 += double(pSrcBand[i + 3 + iSampJ]) *
4881 : padfWeightsHorizontal[iC + 3];
4882 : }
4883 : dfAccumulatorLocal += dfAccumulatorLocal2;
4884 : if (i < iMax)
4885 : {
4886 : dfAccumulatorLocal +=
4887 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4888 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4889 : padfWeightsHorizontal[iC + 1];
4890 : i += 2;
4891 : iC += 2;
4892 : }
4893 : if (i == iMax)
4894 : {
4895 : dfAccumulatorLocal +=
4896 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4897 : }
4898 :
4899 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4900 : }
4901 :
4902 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4903 :
4904 : return true;
4905 : }
4906 :
4907 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
4908 : /* Could possibly be used too on 32bit, but we would need to check at runtime */
4909 : #if defined(USE_SSE2)
4910 :
4911 : /************************************************************************/
4912 : /* GWKResampleNoMasks_SSE2_T() */
4913 : /************************************************************************/
4914 :
4915 : template <class T>
4916 1382149 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
4917 : double dfSrcX, double dfSrcY, T *pValue,
4918 : double *padfWeightsHorizontal,
4919 : double *padfWeightsVertical,
4920 : double &dfInvWeights)
4921 : {
4922 : // Commonly used; save locally.
4923 1382149 : const int nSrcXSize = poWK->nSrcXSize;
4924 1382149 : const int nSrcYSize = poWK->nSrcYSize;
4925 :
4926 1382149 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4927 1382149 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4928 1382149 : const GPtrDiff_t iSrcOffset =
4929 1382149 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4930 1382149 : const int nXRadius = poWK->nXRadius;
4931 1382149 : const int nYRadius = poWK->nYRadius;
4932 :
4933 : // Politely refuse to process invalid coordinates or obscenely small image.
4934 1382149 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4935 : nYRadius > nSrcYSize)
4936 3 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4937 3 : pValue);
4938 :
4939 1382146 : const T *pSrcBand =
4940 1382146 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
4941 :
4942 1382146 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4943 1382146 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4944 1382146 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4945 1382146 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4946 :
4947 1382146 : int iMin = 1 - nXRadius;
4948 1382146 : if (iSrcX + iMin < 0)
4949 20312 : iMin = -iSrcX;
4950 1382146 : int iMax = nXRadius;
4951 1382146 : if (iSrcX + iMax >= nSrcXSize - 1)
4952 7970 : iMax = nSrcXSize - 1 - iSrcX;
4953 :
4954 1382146 : int jMin = 1 - nYRadius;
4955 1382146 : if (iSrcY + jMin < 0)
4956 22209 : jMin = -iSrcY;
4957 1382146 : int jMax = nYRadius;
4958 1382146 : if (iSrcY + jMax >= nSrcYSize - 1)
4959 9295 : jMax = nSrcYSize - 1 - iSrcY;
4960 :
4961 1382146 : if (iBand == 0)
4962 : {
4963 1091074 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4964 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4965 : padfWeightsVertical, dfInvWeights);
4966 : }
4967 :
4968 1382146 : GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4969 : // Process by chunk of 4 rows.
4970 1382146 : int jC = 0;
4971 1382146 : int j = jMin;
4972 1382146 : double dfAccumulator = 0.0;
4973 3068580 : for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
4974 : {
4975 : // Loop over all pixels in the row.
4976 1686436 : int iC = 0;
4977 1686436 : int i = iMin;
4978 : // Process by chunk of 4 cols.
4979 1686436 : XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
4980 1686436 : XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
4981 1686436 : XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
4982 1686436 : XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
4983 4251632 : for (; i + 2 < iMax; i += 4, iC += 4)
4984 : {
4985 : // Retrieve the pixel & accumulate.
4986 2565196 : XMMReg4Double v_pixels_1 =
4987 2565196 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4988 2565196 : XMMReg4Double v_pixels_2 =
4989 2565196 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
4990 2565196 : XMMReg4Double v_pixels_3 =
4991 2565196 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4992 2565196 : XMMReg4Double v_pixels_4 =
4993 2565196 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4994 :
4995 2565196 : XMMReg4Double v_padfWeight =
4996 2565196 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4997 :
4998 2565196 : v_acc_1 += v_pixels_1 * v_padfWeight;
4999 2565196 : v_acc_2 += v_pixels_2 * v_padfWeight;
5000 2565196 : v_acc_3 += v_pixels_3 * v_padfWeight;
5001 2565196 : v_acc_4 += v_pixels_4 * v_padfWeight;
5002 : }
5003 :
5004 1686436 : if (i < iMax)
5005 : {
5006 25512 : XMMReg2Double v_pixels_1 =
5007 25512 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
5008 25512 : XMMReg2Double v_pixels_2 =
5009 25512 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
5010 25512 : XMMReg2Double v_pixels_3 =
5011 25512 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
5012 25512 : XMMReg2Double v_pixels_4 =
5013 25512 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
5014 :
5015 25512 : XMMReg2Double v_padfWeight =
5016 25512 : XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
5017 :
5018 25512 : v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
5019 25512 : v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
5020 25512 : v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
5021 25512 : v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
5022 :
5023 25512 : i += 2;
5024 25512 : iC += 2;
5025 : }
5026 :
5027 1686436 : double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
5028 1686436 : double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
5029 1686436 : double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
5030 1686436 : double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
5031 :
5032 1686436 : if (i == iMax)
5033 : {
5034 27557 : dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
5035 27557 : padfWeightsHorizontal[iC];
5036 27557 : dfAccumulatorLocal_2 +=
5037 27557 : static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
5038 27557 : padfWeightsHorizontal[iC];
5039 27557 : dfAccumulatorLocal_3 +=
5040 27557 : static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
5041 27557 : padfWeightsHorizontal[iC];
5042 27557 : dfAccumulatorLocal_4 +=
5043 27557 : static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
5044 27557 : padfWeightsHorizontal[iC];
5045 : }
5046 :
5047 1686436 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
5048 1686436 : dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
5049 1686436 : dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
5050 1686436 : dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
5051 : }
5052 1456100 : for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
5053 : {
5054 : // Loop over all pixels in the row.
5055 73954 : int iC = 0;
5056 73954 : int i = iMin;
5057 : // Process by chunk of 4 cols.
5058 73954 : XMMReg4Double v_acc = XMMReg4Double::Zero();
5059 172926 : for (; i + 2 < iMax; i += 4, iC += 4)
5060 : {
5061 : // Retrieve the pixel & accumulate.
5062 98972 : XMMReg4Double v_pixels =
5063 98972 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
5064 98972 : XMMReg4Double v_padfWeight =
5065 98972 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
5066 :
5067 98972 : v_acc += v_pixels * v_padfWeight;
5068 : }
5069 :
5070 73954 : double dfAccumulatorLocal = v_acc.GetHorizSum();
5071 :
5072 73954 : if (i < iMax)
5073 : {
5074 1862 : dfAccumulatorLocal +=
5075 1862 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
5076 1862 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
5077 1862 : padfWeightsHorizontal[iC + 1];
5078 1862 : i += 2;
5079 1862 : iC += 2;
5080 : }
5081 73954 : if (i == iMax)
5082 : {
5083 1803 : dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
5084 1803 : padfWeightsHorizontal[iC];
5085 : }
5086 :
5087 73954 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
5088 : }
5089 :
5090 1382146 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
5091 :
5092 1382146 : return true;
5093 : }
5094 :
5095 : /************************************************************************/
5096 : /* GWKResampleNoMasksT<GByte>() */
5097 : /************************************************************************/
5098 :
5099 : template <>
5100 877023 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
5101 : double dfSrcX, double dfSrcY, GByte *pValue,
5102 : double *padfWeightsHorizontal,
5103 : double *padfWeightsVertical,
5104 : double &dfInvWeights)
5105 : {
5106 877023 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5107 : padfWeightsHorizontal, padfWeightsVertical,
5108 877023 : dfInvWeights);
5109 : }
5110 :
5111 : /************************************************************************/
5112 : /* GWKResampleNoMasksT<GInt16>() */
5113 : /************************************************************************/
5114 :
5115 : template <>
5116 252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
5117 : double dfSrcX, double dfSrcY, GInt16 *pValue,
5118 : double *padfWeightsHorizontal,
5119 : double *padfWeightsVertical,
5120 : double &dfInvWeights)
5121 : {
5122 252563 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5123 : padfWeightsHorizontal, padfWeightsVertical,
5124 252563 : dfInvWeights);
5125 : }
5126 :
5127 : /************************************************************************/
5128 : /* GWKResampleNoMasksT<GUInt16>() */
5129 : /************************************************************************/
5130 :
5131 : template <>
5132 250063 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
5133 : double dfSrcX, double dfSrcY, GUInt16 *pValue,
5134 : double *padfWeightsHorizontal,
5135 : double *padfWeightsVertical,
5136 : double &dfInvWeights)
5137 : {
5138 250063 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5139 : padfWeightsHorizontal, padfWeightsVertical,
5140 250063 : dfInvWeights);
5141 : }
5142 :
5143 : /************************************************************************/
5144 : /* GWKResampleNoMasksT<float>() */
5145 : /************************************************************************/
5146 :
5147 : template <>
5148 2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
5149 : double dfSrcX, double dfSrcY, float *pValue,
5150 : double *padfWeightsHorizontal,
5151 : double *padfWeightsVertical,
5152 : double &dfInvWeights)
5153 : {
5154 2500 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5155 : padfWeightsHorizontal, padfWeightsVertical,
5156 2500 : dfInvWeights);
5157 : }
5158 :
5159 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
5160 :
5161 : /************************************************************************/
5162 : /* GWKResampleNoMasksT<double>() */
5163 : /************************************************************************/
5164 :
5165 : template <>
5166 : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
5167 : double dfSrcX, double dfSrcY, double *pValue,
5168 : double *padfWeightsHorizontal,
5169 : double *padfWeightsVertical,
5170 : double &dfInvWeights)
5171 : {
5172 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5173 : padfWeightsHorizontal, padfWeightsVertical,
5174 : dfInvWeights);
5175 : }
5176 :
5177 : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
5178 :
5179 : #endif /* defined(USE_SSE2) */
5180 :
5181 : /************************************************************************/
5182 : /* GWKRoundSourceCoordinates() */
5183 : /************************************************************************/
5184 :
5185 1000 : static void GWKRoundSourceCoordinates(
5186 : int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
5187 : double dfSrcCoordPrecision, double dfErrorThreshold,
5188 : GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
5189 : double dfDstY)
5190 : {
5191 1000 : double dfPct = 0.8;
5192 1000 : if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
5193 : {
5194 1000 : dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
5195 : }
5196 1000 : const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
5197 :
5198 501000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5199 : {
5200 500000 : const double dfXBefore = padfX[iDstX];
5201 500000 : const double dfYBefore = padfY[iDstX];
5202 500000 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
5203 : dfSrcCoordPrecision;
5204 500000 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
5205 : dfSrcCoordPrecision;
5206 :
5207 : // If we are in an uncertainty zone, go to non-approximated
5208 : // transformation.
5209 : // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
5210 : // be at least 10 times greater than the approximation error.
5211 500000 : if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
5212 399914 : fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
5213 : {
5214 180090 : padfX[iDstX] = iDstX + dfDstXOff;
5215 180090 : padfY[iDstX] = dfDstY;
5216 180090 : padfZ[iDstX] = 0.0;
5217 180090 : pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
5218 180090 : padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
5219 180090 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
5220 : dfSrcCoordPrecision;
5221 180090 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
5222 : dfSrcCoordPrecision;
5223 : }
5224 : }
5225 1000 : }
5226 :
5227 : /************************************************************************/
5228 : /* GWKCheckAndComputeSrcOffsets() */
5229 : /************************************************************************/
5230 : static CPL_INLINE bool
5231 187160000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
5232 : int _iDstY, double *_padfX, double *_padfY,
5233 : int _nSrcXSize, int _nSrcYSize,
5234 : GPtrDiff_t &iSrcOffset)
5235 : {
5236 187160000 : const GDALWarpKernel *_poWK = psJob->poWK;
5237 193763000 : for (int iTry = 0; iTry < 2; ++iTry)
5238 : {
5239 193763000 : if (iTry == 1)
5240 : {
5241 : // If the source coordinate is slightly outside of the source raster
5242 : // retry to transform it alone, so that the exact coordinate
5243 : // transformer is used.
5244 :
5245 6603260 : _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
5246 6603260 : _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
5247 6603260 : double dfZ = 0;
5248 6603260 : _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
5249 6603260 : _padfX + _iDstX, _padfY + _iDstX, &dfZ,
5250 6603260 : _pabSuccess + _iDstX);
5251 : }
5252 193763000 : if (!_pabSuccess[_iDstX])
5253 3615020 : return false;
5254 :
5255 : // If this happens this is likely the symptom of a bug somewhere.
5256 190148000 : if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
5257 : {
5258 : static bool bNanCoordFound = false;
5259 0 : if (!bNanCoordFound)
5260 : {
5261 0 : CPLDebug("WARP",
5262 : "GWKCheckAndComputeSrcOffsets(): "
5263 : "NaN coordinate found on point %d.",
5264 : _iDstX);
5265 0 : bNanCoordFound = true;
5266 : }
5267 0 : return false;
5268 : }
5269 :
5270 : /* --------------------------------------------------------------------
5271 : */
5272 : /* Figure out what pixel we want in our source raster, and skip */
5273 : /* further processing if it is well off the source image. */
5274 : /* --------------------------------------------------------------------
5275 : */
5276 : /* We test against the value before casting to avoid the */
5277 : /* problem of asymmetric truncation effects around zero. That is */
5278 : /* -0.5 will be 0 when cast to an int. */
5279 190148000 : if (_padfX[_iDstX] < _poWK->nSrcXOff)
5280 : {
5281 : // If the source coordinate is slightly outside of the source raster
5282 : // retry to transform it alone, so that the exact coordinate
5283 : // transformer is used.
5284 16858100 : if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
5285 2889470 : continue;
5286 13968600 : return false;
5287 : }
5288 :
5289 173290000 : if (_padfY[_iDstX] < _poWK->nSrcYOff)
5290 : {
5291 : // If the source coordinate is slightly outside of the source raster
5292 : // retry to transform it alone, so that the exact coordinate
5293 : // transformer is used.
5294 7890610 : if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
5295 635435 : continue;
5296 7255180 : return false;
5297 : }
5298 :
5299 : // Check for potential overflow when casting from float to int, (if
5300 : // operating outside natural projection area, padfX/Y can be a very huge
5301 : // positive number before doing the actual conversion), as such cast is
5302 : // undefined behavior that can trigger exception with some compilers
5303 : // (see #6753)
5304 165399000 : if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
5305 : {
5306 : // If the source coordinate is slightly outside of the source raster
5307 : // retry to transform it alone, so that the exact coordinate
5308 : // transformer is used.
5309 13193200 : if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
5310 2712440 : continue;
5311 10480800 : return false;
5312 : }
5313 152206000 : if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
5314 : {
5315 : // If the source coordinate is slightly outside of the source raster
5316 : // retry to transform it alone, so that the exact coordinate
5317 : // transformer is used.
5318 5680260 : if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
5319 365913 : continue;
5320 5314340 : return false;
5321 : }
5322 :
5323 146526000 : break;
5324 : }
5325 :
5326 146526000 : int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
5327 146526000 : int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
5328 146526000 : if (iSrcX == _nSrcXSize)
5329 0 : iSrcX--;
5330 146526000 : if (iSrcY == _nSrcYSize)
5331 0 : iSrcY--;
5332 :
5333 : // Those checks should normally be OK given the previous ones.
5334 146526000 : CPLAssert(iSrcX >= 0);
5335 146526000 : CPLAssert(iSrcY >= 0);
5336 146526000 : CPLAssert(iSrcX < _nSrcXSize);
5337 146526000 : CPLAssert(iSrcY < _nSrcYSize);
5338 :
5339 146526000 : iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
5340 :
5341 146526000 : return true;
5342 : }
5343 :
5344 : /************************************************************************/
5345 : /* GWKOneSourceCornerFailsToReproject() */
5346 : /************************************************************************/
5347 :
5348 938 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
5349 : {
5350 938 : GDALWarpKernel *poWK = psJob->poWK;
5351 2802 : for (int iY = 0; iY <= 1; ++iY)
5352 : {
5353 5599 : for (int iX = 0; iX <= 1; ++iX)
5354 : {
5355 3735 : double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
5356 3735 : double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
5357 3735 : double dfZTmp = 0;
5358 3735 : int nSuccess = FALSE;
5359 3735 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
5360 : &dfYTmp, &dfZTmp, &nSuccess);
5361 3735 : if (!nSuccess)
5362 7 : return true;
5363 : }
5364 : }
5365 931 : return false;
5366 : }
5367 :
5368 : /************************************************************************/
5369 : /* GWKAdjustSrcOffsetOnEdge() */
5370 : /************************************************************************/
5371 :
5372 9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
5373 : GPtrDiff_t &iSrcOffset)
5374 : {
5375 9714 : GDALWarpKernel *poWK = psJob->poWK;
5376 9714 : const int nSrcXSize = poWK->nSrcXSize;
5377 9714 : const int nSrcYSize = poWK->nSrcYSize;
5378 :
5379 : // Check if the computed source position slightly altered
5380 : // fails to reproject. If so, then we are at the edge of
5381 : // the validity area, and it is worth checking neighbour
5382 : // source pixels for validity.
5383 9714 : int nSuccess = FALSE;
5384 : {
5385 9714 : double dfXTmp =
5386 9714 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5387 9714 : double dfYTmp =
5388 9714 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5389 9714 : double dfZTmp = 0;
5390 9714 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5391 : &dfZTmp, &nSuccess);
5392 : }
5393 9714 : if (nSuccess)
5394 : {
5395 6996 : double dfXTmp =
5396 6996 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5397 6996 : double dfYTmp =
5398 6996 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5399 6996 : double dfZTmp = 0;
5400 6996 : nSuccess = FALSE;
5401 6996 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5402 : &dfZTmp, &nSuccess);
5403 : }
5404 9714 : if (nSuccess)
5405 : {
5406 5624 : double dfXTmp =
5407 5624 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5408 5624 : double dfYTmp =
5409 5624 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5410 5624 : double dfZTmp = 0;
5411 5624 : nSuccess = FALSE;
5412 5624 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5413 : &dfZTmp, &nSuccess);
5414 : }
5415 :
5416 14166 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5417 4452 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
5418 : {
5419 1860 : iSrcOffset++;
5420 1860 : return true;
5421 : }
5422 10290 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5423 2436 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
5424 : {
5425 1334 : iSrcOffset += nSrcXSize;
5426 1334 : return true;
5427 : }
5428 7838 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5429 1318 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
5430 : {
5431 956 : iSrcOffset--;
5432 956 : return true;
5433 : }
5434 5924 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5435 360 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
5436 : {
5437 340 : iSrcOffset -= nSrcXSize;
5438 340 : return true;
5439 : }
5440 :
5441 5224 : return false;
5442 : }
5443 :
5444 : /************************************************************************/
5445 : /* GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity() */
5446 : /************************************************************************/
5447 :
5448 0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
5449 : GPtrDiff_t &iSrcOffset)
5450 : {
5451 0 : GDALWarpKernel *poWK = psJob->poWK;
5452 0 : const int nSrcXSize = poWK->nSrcXSize;
5453 0 : const int nSrcYSize = poWK->nSrcYSize;
5454 :
5455 : // Check if the computed source position slightly altered
5456 : // fails to reproject. If so, then we are at the edge of
5457 : // the validity area, and it is worth checking neighbour
5458 : // source pixels for validity.
5459 0 : int nSuccess = FALSE;
5460 : {
5461 0 : double dfXTmp =
5462 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5463 0 : double dfYTmp =
5464 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5465 0 : double dfZTmp = 0;
5466 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5467 : &dfZTmp, &nSuccess);
5468 : }
5469 0 : if (nSuccess)
5470 : {
5471 0 : double dfXTmp =
5472 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5473 0 : double dfYTmp =
5474 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5475 0 : double dfZTmp = 0;
5476 0 : nSuccess = FALSE;
5477 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5478 : &dfZTmp, &nSuccess);
5479 : }
5480 0 : if (nSuccess)
5481 : {
5482 0 : double dfXTmp =
5483 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5484 0 : double dfYTmp =
5485 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5486 0 : double dfZTmp = 0;
5487 0 : nSuccess = FALSE;
5488 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5489 : &dfZTmp, &nSuccess);
5490 : }
5491 :
5492 0 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5493 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >=
5494 : SRC_DENSITY_THRESHOLD_FLOAT)
5495 : {
5496 0 : iSrcOffset++;
5497 0 : return true;
5498 : }
5499 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5500 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
5501 : SRC_DENSITY_THRESHOLD_FLOAT)
5502 : {
5503 0 : iSrcOffset += nSrcXSize;
5504 0 : return true;
5505 : }
5506 0 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5507 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
5508 : SRC_DENSITY_THRESHOLD_FLOAT)
5509 : {
5510 0 : iSrcOffset--;
5511 0 : return true;
5512 : }
5513 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5514 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
5515 : SRC_DENSITY_THRESHOLD_FLOAT)
5516 : {
5517 0 : iSrcOffset -= nSrcXSize;
5518 0 : return true;
5519 : }
5520 :
5521 0 : return false;
5522 : }
5523 :
5524 : /************************************************************************/
5525 : /* GWKGeneralCase() */
5526 : /* */
5527 : /* This is the most general case. It attempts to handle all */
5528 : /* possible features with relatively little concern for */
5529 : /* efficiency. */
5530 : /************************************************************************/
5531 :
5532 239 : static void GWKGeneralCaseThread(void *pData)
5533 : {
5534 239 : GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
5535 239 : GDALWarpKernel *poWK = psJob->poWK;
5536 239 : const int iYMin = psJob->iYMin;
5537 239 : const int iYMax = psJob->iYMax;
5538 : const double dfMultFactorVerticalShiftPipeline =
5539 239 : poWK->bApplyVerticalShift
5540 239 : ? CPLAtof(CSLFetchNameValueDef(
5541 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5542 : "1.0"))
5543 239 : : 0.0;
5544 : const bool bAvoidNoDataSingleBand =
5545 239 : poWK->nBands == 1 ||
5546 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
5547 239 : "UNIFIED_SRC_NODATA", "FALSE"));
5548 :
5549 239 : int nDstXSize = poWK->nDstXSize;
5550 239 : int nSrcXSize = poWK->nSrcXSize;
5551 239 : int nSrcYSize = poWK->nSrcYSize;
5552 :
5553 : /* -------------------------------------------------------------------- */
5554 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5555 : /* scanlines worth of positions. */
5556 : /* -------------------------------------------------------------------- */
5557 : // For x, 2 *, because we cache the precomputed values at the end.
5558 : double *padfX =
5559 239 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5560 : double *padfY =
5561 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5562 : double *padfZ =
5563 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5564 239 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5565 :
5566 239 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
5567 :
5568 239 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5569 239 : if (poWK->eResample != GRA_NearestNeighbour)
5570 : {
5571 220 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5572 : }
5573 239 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5574 239 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5575 239 : const double dfErrorThreshold = CPLAtof(
5576 239 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5577 :
5578 : const bool bOneSourceCornerFailsToReproject =
5579 239 : GWKOneSourceCornerFailsToReproject(psJob);
5580 :
5581 : // Precompute values.
5582 6469 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5583 6230 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5584 :
5585 : /* ==================================================================== */
5586 : /* Loop over output lines. */
5587 : /* ==================================================================== */
5588 6469 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5589 : {
5590 : /* --------------------------------------------------------------------
5591 : */
5592 : /* Setup points to transform to source image space. */
5593 : /* --------------------------------------------------------------------
5594 : */
5595 6230 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5596 6230 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5597 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5598 236160 : padfY[iDstX] = dfY;
5599 6230 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5600 :
5601 : /* --------------------------------------------------------------------
5602 : */
5603 : /* Transform the points from destination pixel/line coordinates */
5604 : /* to source pixel/line coordinates. */
5605 : /* --------------------------------------------------------------------
5606 : */
5607 6230 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5608 : padfY, padfZ, pabSuccess);
5609 6230 : if (dfSrcCoordPrecision > 0.0)
5610 : {
5611 0 : GWKRoundSourceCoordinates(
5612 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5613 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5614 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5615 : }
5616 :
5617 : /* ====================================================================
5618 : */
5619 : /* Loop over pixels in output scanline. */
5620 : /* ====================================================================
5621 : */
5622 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5623 : {
5624 236160 : GPtrDiff_t iSrcOffset = 0;
5625 236160 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5626 : padfX, padfY, nSrcXSize,
5627 : nSrcYSize, iSrcOffset))
5628 0 : continue;
5629 :
5630 : /* --------------------------------------------------------------------
5631 : */
5632 : /* Do not try to apply transparent/invalid source pixels to the
5633 : */
5634 : /* destination. This currently ignores the multi-pixel input
5635 : */
5636 : /* of bilinear and cubic resamples. */
5637 : /* --------------------------------------------------------------------
5638 : */
5639 236160 : double dfDensity = 1.0;
5640 :
5641 236160 : if (poWK->pafUnifiedSrcDensity != nullptr)
5642 : {
5643 1200 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5644 1200 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5645 : {
5646 0 : if (!bOneSourceCornerFailsToReproject)
5647 : {
5648 0 : continue;
5649 : }
5650 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5651 : psJob, iSrcOffset))
5652 : {
5653 0 : dfDensity =
5654 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5655 : }
5656 : else
5657 : {
5658 0 : continue;
5659 : }
5660 : }
5661 : }
5662 :
5663 236160 : if (poWK->panUnifiedSrcValid != nullptr &&
5664 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5665 : {
5666 0 : if (!bOneSourceCornerFailsToReproject)
5667 : {
5668 0 : continue;
5669 : }
5670 0 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5671 : {
5672 0 : continue;
5673 : }
5674 : }
5675 :
5676 : /* ====================================================================
5677 : */
5678 : /* Loop processing each band. */
5679 : /* ====================================================================
5680 : */
5681 236160 : bool bHasFoundDensity = false;
5682 :
5683 236160 : const GPtrDiff_t iDstOffset =
5684 236160 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5685 472320 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5686 : {
5687 236160 : double dfBandDensity = 0.0;
5688 236160 : double dfValueReal = 0.0;
5689 236160 : double dfValueImag = 0.0;
5690 :
5691 : /* --------------------------------------------------------------------
5692 : */
5693 : /* Collect the source value. */
5694 : /* --------------------------------------------------------------------
5695 : */
5696 236160 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5697 : nSrcYSize == 1)
5698 : {
5699 : // FALSE is returned if dfBandDensity == 0, which is
5700 : // checked below.
5701 568 : CPL_IGNORE_RET_VAL(GWKGetPixelValue(
5702 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
5703 : &dfValueImag));
5704 : }
5705 235592 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5706 : {
5707 248 : GWKBilinearResample4Sample(
5708 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5709 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5710 : &dfValueReal, &dfValueImag);
5711 : }
5712 235344 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5713 : {
5714 248 : GWKCubicResample4Sample(
5715 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5716 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5717 : &dfValueReal, &dfValueImag);
5718 : }
5719 : else
5720 : #ifdef DEBUG
5721 : // Only useful for clang static analyzer.
5722 235096 : if (psWrkStruct != nullptr)
5723 : #endif
5724 : {
5725 235096 : psWrkStruct->pfnGWKResample(
5726 235096 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5727 235096 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5728 : &dfValueReal, &dfValueImag, psWrkStruct);
5729 : }
5730 :
5731 : // If we didn't find any valid inputs skip to next band.
5732 236160 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5733 0 : continue;
5734 :
5735 236160 : if (poWK->bApplyVerticalShift)
5736 : {
5737 0 : if (!std::isfinite(padfZ[iDstX]))
5738 0 : continue;
5739 : // Subtract padfZ[] since the coordinate transformation is
5740 : // from target to source
5741 0 : dfValueReal =
5742 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
5743 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5744 : }
5745 :
5746 236160 : bHasFoundDensity = true;
5747 :
5748 : /* --------------------------------------------------------------------
5749 : */
5750 : /* We have a computed value from the source. Now apply it
5751 : * to */
5752 : /* the destination pixel. */
5753 : /* --------------------------------------------------------------------
5754 : */
5755 236160 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
5756 : dfValueReal, dfValueImag,
5757 : bAvoidNoDataSingleBand);
5758 : }
5759 :
5760 236160 : if (!bHasFoundDensity)
5761 0 : continue;
5762 :
5763 236160 : if (!bAvoidNoDataSingleBand)
5764 : {
5765 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
5766 : }
5767 :
5768 : /* --------------------------------------------------------------------
5769 : */
5770 : /* Update destination density/validity masks. */
5771 : /* --------------------------------------------------------------------
5772 : */
5773 236160 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5774 :
5775 236160 : if (poWK->panDstValid != nullptr)
5776 : {
5777 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
5778 : }
5779 : } /* Next iDstX */
5780 :
5781 : /* --------------------------------------------------------------------
5782 : */
5783 : /* Report progress to the user, and optionally cancel out. */
5784 : /* --------------------------------------------------------------------
5785 : */
5786 6230 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5787 0 : break;
5788 : }
5789 :
5790 : /* -------------------------------------------------------------------- */
5791 : /* Cleanup and return. */
5792 : /* -------------------------------------------------------------------- */
5793 239 : CPLFree(padfX);
5794 239 : CPLFree(padfY);
5795 239 : CPLFree(padfZ);
5796 239 : CPLFree(pabSuccess);
5797 239 : if (psWrkStruct)
5798 220 : GWKResampleDeleteWrkStruct(psWrkStruct);
5799 239 : }
5800 :
5801 239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
5802 : {
5803 239 : return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
5804 : }
5805 :
5806 : /************************************************************************/
5807 : /* GWKRealCase() */
5808 : /* */
5809 : /* General case for non-complex data types. */
5810 : /************************************************************************/
5811 :
5812 223 : static void GWKRealCaseThread(void *pData)
5813 :
5814 : {
5815 223 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5816 223 : GDALWarpKernel *poWK = psJob->poWK;
5817 223 : const int iYMin = psJob->iYMin;
5818 223 : const int iYMax = psJob->iYMax;
5819 :
5820 223 : const int nDstXSize = poWK->nDstXSize;
5821 223 : const int nSrcXSize = poWK->nSrcXSize;
5822 223 : const int nSrcYSize = poWK->nSrcYSize;
5823 : const double dfMultFactorVerticalShiftPipeline =
5824 223 : poWK->bApplyVerticalShift
5825 223 : ? CPLAtof(CSLFetchNameValueDef(
5826 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5827 : "1.0"))
5828 223 : : 0.0;
5829 : const bool bAvoidNoDataSingleBand =
5830 305 : poWK->nBands == 1 ||
5831 82 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
5832 223 : "UNIFIED_SRC_NODATA", "FALSE"));
5833 :
5834 : /* -------------------------------------------------------------------- */
5835 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5836 : /* scanlines worth of positions. */
5837 : /* -------------------------------------------------------------------- */
5838 :
5839 : // For x, 2 *, because we cache the precomputed values at the end.
5840 : double *padfX =
5841 223 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5842 : double *padfY =
5843 223 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5844 : double *padfZ =
5845 223 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5846 223 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5847 :
5848 223 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
5849 :
5850 223 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5851 223 : if (poWK->eResample != GRA_NearestNeighbour)
5852 : {
5853 181 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5854 : }
5855 223 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5856 223 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5857 223 : const double dfErrorThreshold = CPLAtof(
5858 223 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5859 :
5860 638 : const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
5861 415 : poWK->papanBandSrcValid == nullptr &&
5862 192 : poWK->pafUnifiedSrcDensity != nullptr;
5863 :
5864 : const bool bOneSourceCornerFailsToReproject =
5865 223 : GWKOneSourceCornerFailsToReproject(psJob);
5866 :
5867 : // Precompute values.
5868 24657 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5869 24434 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5870 :
5871 : /* ==================================================================== */
5872 : /* Loop over output lines. */
5873 : /* ==================================================================== */
5874 25909 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5875 : {
5876 : /* --------------------------------------------------------------------
5877 : */
5878 : /* Setup points to transform to source image space. */
5879 : /* --------------------------------------------------------------------
5880 : */
5881 25686 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5882 25686 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5883 44594200 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5884 44568500 : padfY[iDstX] = dfY;
5885 25686 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5886 :
5887 : /* --------------------------------------------------------------------
5888 : */
5889 : /* Transform the points from destination pixel/line coordinates */
5890 : /* to source pixel/line coordinates. */
5891 : /* --------------------------------------------------------------------
5892 : */
5893 25686 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5894 : padfY, padfZ, pabSuccess);
5895 25686 : if (dfSrcCoordPrecision > 0.0)
5896 : {
5897 0 : GWKRoundSourceCoordinates(
5898 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5899 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5900 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5901 : }
5902 :
5903 : /* ====================================================================
5904 : */
5905 : /* Loop over pixels in output scanline. */
5906 : /* ====================================================================
5907 : */
5908 44594200 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5909 : {
5910 44568500 : GPtrDiff_t iSrcOffset = 0;
5911 44568500 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5912 : padfX, padfY, nSrcXSize,
5913 : nSrcYSize, iSrcOffset))
5914 43823900 : continue;
5915 :
5916 : /* --------------------------------------------------------------------
5917 : */
5918 : /* Do not try to apply transparent/invalid source pixels to the
5919 : */
5920 : /* destination. This currently ignores the multi-pixel input
5921 : */
5922 : /* of bilinear and cubic resamples. */
5923 : /* --------------------------------------------------------------------
5924 : */
5925 31812400 : double dfDensity = 1.0;
5926 :
5927 31812400 : if (poWK->pafUnifiedSrcDensity != nullptr)
5928 : {
5929 1669560 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5930 1669560 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5931 : {
5932 1538480 : if (!bOneSourceCornerFailsToReproject)
5933 : {
5934 1538480 : continue;
5935 : }
5936 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5937 : psJob, iSrcOffset))
5938 : {
5939 0 : dfDensity =
5940 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5941 : }
5942 : else
5943 : {
5944 0 : continue;
5945 : }
5946 : }
5947 : }
5948 :
5949 59903100 : if (poWK->panUnifiedSrcValid != nullptr &&
5950 29629200 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5951 : {
5952 29531600 : if (!bOneSourceCornerFailsToReproject)
5953 : {
5954 29529300 : continue;
5955 : }
5956 2229 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5957 : {
5958 0 : continue;
5959 : }
5960 : }
5961 :
5962 : /* ====================================================================
5963 : */
5964 : /* Loop processing each band. */
5965 : /* ====================================================================
5966 : */
5967 744578 : bool bHasFoundDensity = false;
5968 :
5969 744578 : const GPtrDiff_t iDstOffset =
5970 744578 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5971 2092550 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5972 : {
5973 1347980 : double dfBandDensity = 0.0;
5974 1347980 : double dfValueReal = 0.0;
5975 :
5976 : /* --------------------------------------------------------------------
5977 : */
5978 : /* Collect the source value. */
5979 : /* --------------------------------------------------------------------
5980 : */
5981 1347980 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5982 : nSrcYSize == 1)
5983 : {
5984 : // FALSE is returned if dfBandDensity == 0, which is
5985 : // checked below.
5986 15516 : CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
5987 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
5988 : }
5989 1332460 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5990 : {
5991 2046 : double dfValueImagIgnored = 0.0;
5992 2046 : GWKBilinearResample4Sample(
5993 2046 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5994 2046 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5995 2046 : &dfValueReal, &dfValueImagIgnored);
5996 : }
5997 1330410 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5998 : {
5999 691552 : if (bSrcMaskIsDensity)
6000 : {
6001 389755 : if (poWK->eWorkingDataType == GDT_UInt8)
6002 : {
6003 389755 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
6004 389755 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6005 389755 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6006 : &dfValueReal);
6007 : }
6008 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
6009 : {
6010 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<
6011 0 : GUInt16>(poWK, iBand,
6012 0 : padfX[iDstX] - poWK->nSrcXOff,
6013 0 : padfY[iDstX] - poWK->nSrcYOff,
6014 : &dfBandDensity, &dfValueReal);
6015 : }
6016 : else
6017 : {
6018 0 : GWKCubicResampleSrcMaskIsDensity4SampleReal(
6019 0 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6020 0 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6021 : &dfValueReal);
6022 : }
6023 : }
6024 : else
6025 : {
6026 301797 : double dfValueImagIgnored = 0.0;
6027 301797 : GWKCubicResample4Sample(
6028 301797 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6029 301797 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6030 : &dfValueReal, &dfValueImagIgnored);
6031 691552 : }
6032 : }
6033 : else
6034 : #ifdef DEBUG
6035 : // Only useful for clang static analyzer.
6036 638861 : if (psWrkStruct != nullptr)
6037 : #endif
6038 : {
6039 638861 : double dfValueImagIgnored = 0.0;
6040 638861 : psWrkStruct->pfnGWKResample(
6041 638861 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6042 638861 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6043 : &dfValueReal, &dfValueImagIgnored, psWrkStruct);
6044 : }
6045 :
6046 : // If we didn't find any valid inputs skip to next band.
6047 1347980 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
6048 0 : continue;
6049 :
6050 1347980 : if (poWK->bApplyVerticalShift)
6051 : {
6052 0 : if (!std::isfinite(padfZ[iDstX]))
6053 0 : continue;
6054 : // Subtract padfZ[] since the coordinate transformation is
6055 : // from target to source
6056 0 : dfValueReal =
6057 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
6058 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
6059 : }
6060 :
6061 1347980 : bHasFoundDensity = true;
6062 :
6063 : /* --------------------------------------------------------------------
6064 : */
6065 : /* We have a computed value from the source. Now apply it
6066 : * to */
6067 : /* the destination pixel. */
6068 : /* --------------------------------------------------------------------
6069 : */
6070 1347980 : GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
6071 : dfValueReal, bAvoidNoDataSingleBand);
6072 : }
6073 :
6074 744578 : if (!bHasFoundDensity)
6075 0 : continue;
6076 :
6077 744578 : if (!bAvoidNoDataSingleBand)
6078 : {
6079 100295 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
6080 : }
6081 :
6082 : /* --------------------------------------------------------------------
6083 : */
6084 : /* Update destination density/validity masks. */
6085 : /* --------------------------------------------------------------------
6086 : */
6087 744578 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6088 :
6089 744578 : if (poWK->panDstValid != nullptr)
6090 : {
6091 104586 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6092 : }
6093 : } // Next iDstX.
6094 :
6095 : /* --------------------------------------------------------------------
6096 : */
6097 : /* Report progress to the user, and optionally cancel out. */
6098 : /* --------------------------------------------------------------------
6099 : */
6100 25686 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6101 0 : break;
6102 : }
6103 :
6104 : /* -------------------------------------------------------------------- */
6105 : /* Cleanup and return. */
6106 : /* -------------------------------------------------------------------- */
6107 223 : CPLFree(padfX);
6108 223 : CPLFree(padfY);
6109 223 : CPLFree(padfZ);
6110 223 : CPLFree(pabSuccess);
6111 223 : if (psWrkStruct)
6112 181 : GWKResampleDeleteWrkStruct(psWrkStruct);
6113 223 : }
6114 :
6115 223 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
6116 : {
6117 223 : return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
6118 : }
6119 :
6120 : /************************************************************************/
6121 : /* GWKCubicResampleNoMasks4MultiBandT() */
6122 : /************************************************************************/
6123 :
6124 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
6125 : /* and enough SSE registries */
6126 : #if defined(USE_SSE2)
6127 :
6128 142031000 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
6129 : const __m128 row2, const __m128 row3,
6130 : const __m128 weightsXY0,
6131 : const __m128 weightsXY1,
6132 : const __m128 weightsXY2,
6133 : const __m128 weightsXY3)
6134 : {
6135 994218000 : return XMMHorizontalAdd(_mm_add_ps(
6136 : _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
6137 : _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
6138 142031000 : _mm_mul_ps(row3, weightsXY3))));
6139 : }
6140 :
6141 : template <class T>
6142 48826142 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
6143 : double dfSrcX, double dfSrcY,
6144 : const GPtrDiff_t iDstOffset)
6145 : {
6146 48826142 : const double dfSrcXShifted = dfSrcX - 0.5;
6147 48826142 : const int iSrcX = static_cast<int>(dfSrcXShifted);
6148 48826142 : const double dfSrcYShifted = dfSrcY - 0.5;
6149 48826142 : const int iSrcY = static_cast<int>(dfSrcYShifted);
6150 48826142 : const GPtrDiff_t iSrcOffset =
6151 48826142 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
6152 :
6153 : // Get the bilinear interpolation at the image borders.
6154 48826142 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
6155 47479062 : iSrcY + 2 >= poWK->nSrcYSize)
6156 : {
6157 5929580 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6158 : {
6159 : T value;
6160 4447190 : GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
6161 : &value);
6162 4447190 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6163 : value;
6164 1482400 : }
6165 : }
6166 : else
6167 : {
6168 47343762 : const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
6169 47343762 : const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
6170 :
6171 : float afCoeffsX[4];
6172 : float afCoeffsY[4];
6173 47343762 : GWKCubicComputeWeights(fDeltaX, afCoeffsX);
6174 47343762 : GWKCubicComputeWeights(fDeltaY, afCoeffsY);
6175 47343762 : const auto weightsX = _mm_loadu_ps(afCoeffsX);
6176 : const auto weightsXY0 =
6177 94687424 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
6178 : const auto weightsXY1 =
6179 94687424 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
6180 : const auto weightsXY2 =
6181 94687424 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
6182 : const auto weightsXY3 =
6183 47343762 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
6184 :
6185 47343762 : const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
6186 :
6187 47343762 : int iBand = 0;
6188 : // Process 2 bands at a time
6189 94687424 : for (; iBand + 1 < poWK->nBands; iBand += 2)
6190 : {
6191 47343762 : const T *CPL_RESTRICT pBand0 =
6192 47343762 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6193 47343762 : const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
6194 : const auto row1_0 =
6195 47343762 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6196 : const auto row2_0 =
6197 47343762 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6198 : const auto row3_0 =
6199 47343762 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6200 :
6201 47343762 : const T *CPL_RESTRICT pBand1 =
6202 47343762 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
6203 47343762 : const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
6204 : const auto row1_1 =
6205 47343762 : XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
6206 : const auto row2_1 =
6207 47343762 : XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
6208 : const auto row3_1 =
6209 47343762 : XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
6210 :
6211 : const float fValue_0 =
6212 47343762 : Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
6213 : weightsXY1, weightsXY2, weightsXY3);
6214 :
6215 : const float fValue_1 =
6216 47343762 : Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
6217 : weightsXY1, weightsXY2, weightsXY3);
6218 :
6219 47343762 : T *CPL_RESTRICT pDstBand0 =
6220 47343762 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6221 47343762 : pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
6222 :
6223 47343762 : T *CPL_RESTRICT pDstBand1 =
6224 47343762 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
6225 47343762 : pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
6226 : }
6227 47343762 : if (iBand < poWK->nBands)
6228 : {
6229 47343762 : const T *CPL_RESTRICT pBand0 =
6230 47343762 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6231 47343762 : const auto row0 = XMMLoad4Values(pBand0 + iOffset);
6232 : const auto row1 =
6233 47343762 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6234 : const auto row2 =
6235 47343762 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6236 : const auto row3 =
6237 47343762 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6238 :
6239 : const float fValue =
6240 47343762 : Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
6241 : weightsXY2, weightsXY3);
6242 :
6243 47343762 : T *CPL_RESTRICT pDstBand =
6244 47343762 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6245 47343762 : pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
6246 : }
6247 : }
6248 :
6249 48826142 : if (poWK->pafDstDensity)
6250 46672101 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6251 48826142 : }
6252 :
6253 : #endif // defined(USE_SSE2)
6254 :
6255 : /************************************************************************/
6256 : /* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */
6257 : /************************************************************************/
6258 :
6259 : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
6260 1986 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
6261 :
6262 : {
6263 1986 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6264 1986 : GDALWarpKernel *poWK = psJob->poWK;
6265 1986 : const int iYMin = psJob->iYMin;
6266 1986 : const int iYMax = psJob->iYMax;
6267 1968 : const double dfMultFactorVerticalShiftPipeline =
6268 1986 : poWK->bApplyVerticalShift
6269 18 : ? CPLAtof(CSLFetchNameValueDef(
6270 18 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6271 : "1.0"))
6272 : : 0.0;
6273 :
6274 1986 : const int nDstXSize = poWK->nDstXSize;
6275 1986 : const int nSrcXSize = poWK->nSrcXSize;
6276 1986 : const int nSrcYSize = poWK->nSrcYSize;
6277 :
6278 : /* -------------------------------------------------------------------- */
6279 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6280 : /* scanlines worth of positions. */
6281 : /* -------------------------------------------------------------------- */
6282 :
6283 : // For x, 2 *, because we cache the precomputed values at the end.
6284 : double *padfX =
6285 1986 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6286 : double *padfY =
6287 1986 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6288 : double *padfZ =
6289 1986 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6290 1986 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6291 :
6292 1986 : const int nXRadius = poWK->nXRadius;
6293 : double *padfWeightsX =
6294 1986 : static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
6295 : double *padfWeightsY = static_cast<double *>(
6296 1986 : CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
6297 1986 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6298 1986 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6299 1986 : const double dfErrorThreshold = CPLAtof(
6300 1986 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6301 :
6302 : // Precompute values.
6303 493263 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6304 491277 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6305 :
6306 : /* ==================================================================== */
6307 : /* Loop over output lines. */
6308 : /* ==================================================================== */
6309 313119 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6310 : {
6311 : /* --------------------------------------------------------------------
6312 : */
6313 : /* Setup points to transform to source image space. */
6314 : /* --------------------------------------------------------------------
6315 : */
6316 311135 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6317 311135 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6318 108893795 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6319 108582683 : padfY[iDstX] = dfY;
6320 311135 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6321 :
6322 : /* --------------------------------------------------------------------
6323 : */
6324 : /* Transform the points from destination pixel/line coordinates */
6325 : /* to source pixel/line coordinates. */
6326 : /* --------------------------------------------------------------------
6327 : */
6328 311135 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6329 : padfY, padfZ, pabSuccess);
6330 311135 : if (dfSrcCoordPrecision > 0.0)
6331 : {
6332 1000 : GWKRoundSourceCoordinates(
6333 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6334 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6335 1000 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6336 : }
6337 :
6338 : /* ====================================================================
6339 : */
6340 : /* Loop over pixels in output scanline. */
6341 : /* ====================================================================
6342 : */
6343 108893795 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6344 : {
6345 108582683 : GPtrDiff_t iSrcOffset = 0;
6346 108582683 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6347 : padfX, padfY, nSrcXSize,
6348 : nSrcYSize, iSrcOffset))
6349 61444188 : continue;
6350 :
6351 : /* ====================================================================
6352 : */
6353 : /* Loop processing each band. */
6354 : /* ====================================================================
6355 : */
6356 95964607 : const GPtrDiff_t iDstOffset =
6357 95964607 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6358 :
6359 : #if defined(USE_SSE2)
6360 : if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
6361 : (std::is_same<T, GByte>::value ||
6362 : std::is_same<T, GUInt16>::value))
6363 : {
6364 49891741 : if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
6365 : {
6366 48826142 : GWKCubicResampleNoMasks4MultiBandT<T>(
6367 48826142 : poWK, padfX[iDstX] - poWK->nSrcXOff,
6368 48826142 : padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
6369 :
6370 48826142 : continue;
6371 : }
6372 : }
6373 : #endif // defined(USE_SSE2)
6374 :
6375 47138478 : [[maybe_unused]] double dfInvWeights = 0;
6376 127960918 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6377 : {
6378 80822844 : T value = 0;
6379 : if constexpr (eResample == GRA_NearestNeighbour)
6380 : {
6381 73098330 : value = reinterpret_cast<T *>(
6382 73098330 : poWK->papabySrcImage[iBand])[iSrcOffset];
6383 : }
6384 : else if constexpr (bUse4SamplesFormula)
6385 : {
6386 : if constexpr (eResample == GRA_Bilinear)
6387 4041401 : GWKBilinearResampleNoMasks4SampleT(
6388 4041401 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6389 4041401 : padfY[iDstX] - poWK->nSrcYOff, &value);
6390 : else
6391 2300964 : GWKCubicResampleNoMasks4SampleT(
6392 2300964 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6393 2300964 : padfY[iDstX] - poWK->nSrcYOff, &value);
6394 : }
6395 : else
6396 : {
6397 1382149 : GWKResampleNoMasksT(
6398 1382149 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6399 1382149 : padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
6400 : padfWeightsY, dfInvWeights);
6401 : }
6402 :
6403 80822844 : if (poWK->bApplyVerticalShift)
6404 : {
6405 818 : if (!std::isfinite(padfZ[iDstX]))
6406 0 : continue;
6407 : // Subtract padfZ[] since the coordinate transformation is
6408 : // from target to source
6409 818 : value = GWKClampValueT<T>(
6410 818 : double(value) * poWK->dfMultFactorVerticalShift -
6411 818 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6412 : }
6413 :
6414 80822844 : if (poWK->pafDstDensity)
6415 8224397 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6416 :
6417 80822844 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6418 : value;
6419 : }
6420 : }
6421 :
6422 : /* --------------------------------------------------------------------
6423 : */
6424 : /* Report progress to the user, and optionally cancel out. */
6425 : /* --------------------------------------------------------------------
6426 : */
6427 311135 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6428 2 : break;
6429 : }
6430 :
6431 : /* -------------------------------------------------------------------- */
6432 : /* Cleanup and return. */
6433 : /* -------------------------------------------------------------------- */
6434 1986 : CPLFree(padfX);
6435 1986 : CPLFree(padfY);
6436 1986 : CPLFree(padfZ);
6437 1986 : CPLFree(pabSuccess);
6438 1986 : CPLFree(padfWeightsX);
6439 1986 : CPLFree(padfWeightsY);
6440 1986 : }
6441 :
6442 : template <class T, GDALResampleAlg eResample>
6443 962 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
6444 : {
6445 962 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6446 : pData);
6447 962 : }
6448 :
6449 : template <class T, GDALResampleAlg eResample>
6450 1024 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
6451 :
6452 : {
6453 1024 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6454 1024 : GDALWarpKernel *poWK = psJob->poWK;
6455 : static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
6456 1024 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
6457 1024 : if (bUse4SamplesFormula)
6458 969 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
6459 : pData);
6460 : else
6461 55 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6462 : pData);
6463 1024 : }
6464 :
6465 911 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6466 : {
6467 911 : return GWKRun(
6468 : poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
6469 911 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
6470 : }
6471 :
6472 128 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6473 : {
6474 128 : return GWKRun(
6475 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
6476 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
6477 128 : GRA_Bilinear>);
6478 : }
6479 :
6480 850 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6481 : {
6482 850 : return GWKRun(
6483 : poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
6484 850 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
6485 : }
6486 :
6487 9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6488 : {
6489 9 : return GWKRun(
6490 : poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
6491 9 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
6492 : }
6493 :
6494 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6495 :
6496 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6497 : {
6498 : return GWKRun(
6499 : poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
6500 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
6501 : }
6502 : #endif
6503 :
6504 12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6505 : {
6506 12 : return GWKRun(
6507 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
6508 12 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
6509 : }
6510 :
6511 : /************************************************************************/
6512 : /* GWKNearestByte() */
6513 : /* */
6514 : /* Case for 8bit input data with nearest neighbour resampling */
6515 : /* using valid flags. Should be as fast as possible for this */
6516 : /* particular transformation type. */
6517 : /************************************************************************/
6518 :
6519 476 : template <class T> static void GWKNearestThread(void *pData)
6520 :
6521 : {
6522 476 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6523 476 : GDALWarpKernel *poWK = psJob->poWK;
6524 476 : const int iYMin = psJob->iYMin;
6525 476 : const int iYMax = psJob->iYMax;
6526 476 : const double dfMultFactorVerticalShiftPipeline =
6527 476 : poWK->bApplyVerticalShift
6528 0 : ? CPLAtof(CSLFetchNameValueDef(
6529 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6530 : "1.0"))
6531 : : 0.0;
6532 476 : const bool bAvoidNoDataSingleBand =
6533 545 : poWK->nBands == 1 ||
6534 69 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
6535 : "UNIFIED_SRC_NODATA", "FALSE"));
6536 :
6537 476 : const int nDstXSize = poWK->nDstXSize;
6538 476 : const int nSrcXSize = poWK->nSrcXSize;
6539 476 : const int nSrcYSize = poWK->nSrcYSize;
6540 :
6541 : /* -------------------------------------------------------------------- */
6542 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6543 : /* scanlines worth of positions. */
6544 : /* -------------------------------------------------------------------- */
6545 :
6546 : // For x, 2 *, because we cache the precomputed values at the end.
6547 : double *padfX =
6548 476 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6549 : double *padfY =
6550 476 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6551 : double *padfZ =
6552 476 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6553 476 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6554 :
6555 476 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6556 476 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6557 476 : const double dfErrorThreshold = CPLAtof(
6558 476 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6559 :
6560 : const bool bOneSourceCornerFailsToReproject =
6561 476 : GWKOneSourceCornerFailsToReproject(psJob);
6562 :
6563 : // Precompute values.
6564 80555 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6565 80079 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6566 :
6567 : /* ==================================================================== */
6568 : /* Loop over output lines. */
6569 : /* ==================================================================== */
6570 64711 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6571 : {
6572 :
6573 : /* --------------------------------------------------------------------
6574 : */
6575 : /* Setup points to transform to source image space. */
6576 : /* --------------------------------------------------------------------
6577 : */
6578 64235 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6579 64235 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6580 33836597 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6581 33772441 : padfY[iDstX] = dfY;
6582 64235 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6583 :
6584 : /* --------------------------------------------------------------------
6585 : */
6586 : /* Transform the points from destination pixel/line coordinates */
6587 : /* to source pixel/line coordinates. */
6588 : /* --------------------------------------------------------------------
6589 : */
6590 64235 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6591 : padfY, padfZ, pabSuccess);
6592 64235 : if (dfSrcCoordPrecision > 0.0)
6593 : {
6594 0 : GWKRoundSourceCoordinates(
6595 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6596 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6597 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6598 : }
6599 : /* ====================================================================
6600 : */
6601 : /* Loop over pixels in output scanline. */
6602 : /* ====================================================================
6603 : */
6604 33836597 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6605 : {
6606 33772441 : GPtrDiff_t iSrcOffset = 0;
6607 33772441 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6608 : padfX, padfY, nSrcXSize,
6609 : nSrcYSize, iSrcOffset))
6610 21383643 : continue;
6611 :
6612 : /* --------------------------------------------------------------------
6613 : */
6614 : /* Do not try to apply invalid source pixels to the dest. */
6615 : /* --------------------------------------------------------------------
6616 : */
6617 25227005 : if (poWK->panUnifiedSrcValid != nullptr &&
6618 6714445 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6619 : {
6620 5120982 : if (!bOneSourceCornerFailsToReproject)
6621 : {
6622 5113496 : continue;
6623 : }
6624 7485 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
6625 : {
6626 5224 : continue;
6627 : }
6628 : }
6629 :
6630 : /* --------------------------------------------------------------------
6631 : */
6632 : /* Do not try to apply transparent source pixels to the
6633 : * destination.*/
6634 : /* --------------------------------------------------------------------
6635 : */
6636 13393880 : double dfDensity = 1.0;
6637 :
6638 13393880 : if (poWK->pafUnifiedSrcDensity != nullptr)
6639 : {
6640 1557335 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
6641 1557335 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
6642 1005075 : continue;
6643 : }
6644 :
6645 : /* ====================================================================
6646 : */
6647 : /* Loop processing each band. */
6648 : /* ====================================================================
6649 : */
6650 :
6651 12388798 : const GPtrDiff_t iDstOffset =
6652 12388798 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6653 :
6654 27339658 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6655 : {
6656 14950960 : T value = 0;
6657 14950960 : double dfBandDensity = 0.0;
6658 :
6659 : /* --------------------------------------------------------------------
6660 : */
6661 : /* Collect the source value. */
6662 : /* --------------------------------------------------------------------
6663 : */
6664 14950960 : if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
6665 : &value))
6666 : {
6667 :
6668 14950860 : if (poWK->bApplyVerticalShift)
6669 : {
6670 0 : if (!std::isfinite(padfZ[iDstX]))
6671 0 : continue;
6672 : // Subtract padfZ[] since the coordinate transformation
6673 : // is from target to source
6674 0 : value = GWKClampValueT<T>(
6675 0 : double(value) * poWK->dfMultFactorVerticalShift -
6676 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6677 : }
6678 :
6679 14950860 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6680 : dfBandDensity, value,
6681 : bAvoidNoDataSingleBand);
6682 : }
6683 : }
6684 :
6685 : /* --------------------------------------------------------------------
6686 : */
6687 : /* Mark this pixel valid/opaque in the output. */
6688 : /* --------------------------------------------------------------------
6689 : */
6690 :
6691 12388798 : if (!bAvoidNoDataSingleBand)
6692 : {
6693 424278 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
6694 : }
6695 :
6696 12388798 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6697 :
6698 12388798 : if (poWK->panDstValid != nullptr)
6699 : {
6700 11118345 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6701 : }
6702 : } /* Next iDstX */
6703 :
6704 : /* --------------------------------------------------------------------
6705 : */
6706 : /* Report progress to the user, and optionally cancel out. */
6707 : /* --------------------------------------------------------------------
6708 : */
6709 64235 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6710 0 : break;
6711 : }
6712 :
6713 : /* -------------------------------------------------------------------- */
6714 : /* Cleanup and return. */
6715 : /* -------------------------------------------------------------------- */
6716 476 : CPLFree(padfX);
6717 476 : CPLFree(padfY);
6718 476 : CPLFree(padfZ);
6719 476 : CPLFree(pabSuccess);
6720 476 : }
6721 :
6722 363 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
6723 : {
6724 363 : return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
6725 : }
6726 :
6727 14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6728 : {
6729 14 : return GWKRun(
6730 : poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
6731 14 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
6732 : }
6733 :
6734 5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6735 : {
6736 5 : return GWKRun(
6737 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
6738 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
6739 5 : GRA_Bilinear>);
6740 : }
6741 :
6742 6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6743 : {
6744 6 : return GWKRun(
6745 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
6746 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
6747 6 : GRA_Bilinear>);
6748 : }
6749 :
6750 4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6751 : {
6752 4 : return GWKRun(
6753 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
6754 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
6755 4 : GRA_Bilinear>);
6756 : }
6757 :
6758 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6759 :
6760 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6761 : {
6762 : return GWKRun(
6763 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
6764 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
6765 : GRA_Bilinear>);
6766 : }
6767 : #endif
6768 :
6769 5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6770 : {
6771 5 : return GWKRun(
6772 : poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
6773 5 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
6774 : }
6775 :
6776 14 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6777 : {
6778 14 : return GWKRun(
6779 : poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
6780 14 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
6781 : }
6782 :
6783 6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6784 : {
6785 6 : return GWKRun(
6786 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
6787 6 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
6788 : }
6789 :
6790 5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6791 : {
6792 5 : return GWKRun(
6793 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
6794 5 : GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
6795 : }
6796 :
6797 9 : static CPLErr GWKNearestInt8(GDALWarpKernel *poWK)
6798 : {
6799 9 : return GWKRun(poWK, "GWKNearestInt8", GWKNearestThread<int8_t>);
6800 : }
6801 :
6802 40 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
6803 : {
6804 40 : return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
6805 : }
6806 :
6807 10 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
6808 : {
6809 10 : return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
6810 : }
6811 :
6812 11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6813 : {
6814 11 : return GWKRun(
6815 : poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
6816 11 : GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
6817 : }
6818 :
6819 50 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
6820 : {
6821 50 : return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
6822 : }
6823 :
6824 : /************************************************************************/
6825 : /* GWKAverageOrMode() */
6826 : /* */
6827 : /************************************************************************/
6828 :
6829 : #define COMPUTE_WEIGHT_Y(iSrcY) \
6830 : ((iSrcY == iSrcYMin) \
6831 : ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin)) \
6832 : : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax) \
6833 : : 1.0)
6834 :
6835 : #define COMPUTE_WEIGHT(iSrcX, dfWeightY) \
6836 : ((iSrcX == iSrcXMin) ? ((iSrcXMin + 1 == iSrcXMax) \
6837 : ? dfWeightY \
6838 : : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
6839 : : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax)) \
6840 : : dfWeightY)
6841 :
6842 : static void GWKAverageOrModeThread(void *pData);
6843 :
6844 246 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
6845 : {
6846 246 : return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
6847 : }
6848 :
6849 : /************************************************************************/
6850 : /* GWKAverageOrModeComputeLineCoords() */
6851 : /************************************************************************/
6852 :
6853 28663 : static void GWKAverageOrModeComputeLineCoords(
6854 : const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6855 : double *padfY2, double *padfZ, double *padfZ2, int *pabSuccess,
6856 : int *pabSuccess2, int iDstY, double dfSrcCoordPrecision,
6857 : double dfErrorThreshold)
6858 : {
6859 28663 : const GDALWarpKernel *poWK = psJob->poWK;
6860 28663 : const int nDstXSize = poWK->nDstXSize;
6861 :
6862 : // Setup points to transform to source image space.
6863 7360890 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6864 : {
6865 7332220 : padfX[iDstX] = iDstX + poWK->nDstXOff;
6866 7332220 : padfY[iDstX] = iDstY + poWK->nDstYOff;
6867 7332220 : padfZ[iDstX] = 0.0;
6868 7332220 : padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
6869 7332220 : padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
6870 7332220 : padfZ2[iDstX] = 0.0;
6871 : }
6872 :
6873 : /* ----------------------------------------------------------------- */
6874 : /* Transform the points from destination pixel/line coordinates */
6875 : /* to source pixel/line coordinates. */
6876 : /* ----------------------------------------------------------------- */
6877 28663 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY,
6878 : padfZ, pabSuccess);
6879 28663 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
6880 : padfY2, padfZ2, pabSuccess2);
6881 :
6882 28663 : if (dfSrcCoordPrecision > 0.0)
6883 : {
6884 0 : GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
6885 : dfSrcCoordPrecision, dfErrorThreshold,
6886 0 : poWK->pfnTransformer, psJob->pTransformerArg,
6887 0 : poWK->nDstXOff, iDstY + poWK->nDstYOff);
6888 0 : GWKRoundSourceCoordinates(
6889 : nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision,
6890 0 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6891 0 : 1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff);
6892 : }
6893 28663 : }
6894 :
6895 : /************************************************************************/
6896 : /* GWKAverageOrModeComputeSourceCoords() */
6897 : /************************************************************************/
6898 :
6899 7332220 : static bool GWKAverageOrModeComputeSourceCoords(
6900 : const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6901 : double *padfY2, int iDstX, int iDstY, int nXMargin, int nYMargin,
6902 : // Output:
6903 : bool &bWrapOverX, double &dfXMin, double &dfYMin, double &dfXMax,
6904 : double &dfYMax, int &iSrcXMin, int &iSrcYMin, int &iSrcXMax, int &iSrcYMax)
6905 : {
6906 7332220 : const GDALWarpKernel *poWK = psJob->poWK;
6907 7332220 : const int nSrcXSize = poWK->nSrcXSize;
6908 7332220 : const int nSrcYSize = poWK->nSrcYSize;
6909 :
6910 : // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
6911 : // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
6912 7332220 : if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6913 6814810 : padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6914 6814810 : padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6915 6532210 : padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6916 6532210 : padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6917 5870420 : padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6918 5865780 : padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
6919 5350790 : padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
6920 : {
6921 1985190 : return false;
6922 : }
6923 :
6924 : // Compute corners in source crs.
6925 :
6926 : // The transformation might not have preserved ordering of
6927 : // coordinates so do the necessary swapping (#5433).
6928 : // NOTE: this is really an approximative fix. To do something
6929 : // more precise we would for example need to compute the
6930 : // transformation of coordinates in the
6931 : // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
6932 : // coordinates, and take the bounding box of the got source
6933 : // coordinates.
6934 :
6935 5347040 : if (padfX[iDstX] > padfX2[iDstX])
6936 269148 : std::swap(padfX[iDstX], padfX2[iDstX]);
6937 :
6938 : // Detect situations where the target pixel is close to the
6939 : // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
6940 : // close to the left-most and right-most columns of the source
6941 : // raster. The 2 value below was experimentally determined to
6942 : // avoid false-positives and false-negatives.
6943 : // Addresses https://github.com/OSGeo/gdal/issues/6478
6944 5347040 : bWrapOverX = false;
6945 5347040 : const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
6946 5347040 : if (poWK->nSrcXOff == 0 && iDstX + 1 < poWK->nDstXSize &&
6947 3298690 : 2 * padfX[iDstX] - padfX[iDstX + 1] < nThresholdWrapOverX &&
6948 55362 : nSrcXSize - padfX2[iDstX] < nThresholdWrapOverX)
6949 : {
6950 : // Check there is a discontinuity by checking at mid-pixel.
6951 : // NOTE: all this remains fragile. To confidently
6952 : // detect antimeridian warping we should probably try to access
6953 : // georeferenced coordinates, and not rely only on tests on
6954 : // image space coordinates. But accessing georeferenced
6955 : // coordinates from here is not trivial, and we would for example
6956 : // have to handle both geographic, Mercator, etc.
6957 : // Let's hope this heuristics is good enough for now.
6958 1610 : double x = iDstX + 0.5 + poWK->nDstXOff;
6959 1610 : double y = iDstY + poWK->nDstYOff;
6960 1610 : double z = 0;
6961 1610 : int bSuccess = FALSE;
6962 1610 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y, &z,
6963 : &bSuccess);
6964 1610 : if (bSuccess && x < padfX[iDstX])
6965 : {
6966 1596 : bWrapOverX = true;
6967 1596 : std::swap(padfX[iDstX], padfX2[iDstX]);
6968 1596 : padfX2[iDstX] += nSrcXSize;
6969 : }
6970 : }
6971 :
6972 5347040 : dfXMin = padfX[iDstX] - poWK->nSrcXOff;
6973 5347040 : dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
6974 5347040 : constexpr double EPSILON = 1e-10;
6975 : // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
6976 5347040 : if (!(dfXMax > -EPSILON && dfXMin < nSrcXSize + EPSILON))
6977 15528 : return false;
6978 5331510 : iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPSILON), 0.0));
6979 5331510 : iSrcXMax = static_cast<int>(
6980 5331510 : std::min(ceil(dfXMax - EPSILON), static_cast<double>(INT_MAX)));
6981 5331510 : if (!bWrapOverX)
6982 5329910 : iSrcXMax = std::min(iSrcXMax, nSrcXSize);
6983 5331510 : if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
6984 472 : iSrcXMax++;
6985 :
6986 5331510 : if (padfY[iDstX] > padfY2[iDstX])
6987 270117 : std::swap(padfY[iDstX], padfY2[iDstX]);
6988 5331510 : dfYMin = padfY[iDstX] - poWK->nSrcYOff;
6989 5331510 : dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
6990 : // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
6991 5331510 : if (!(dfYMax > -EPSILON && dfYMin < nSrcYSize + EPSILON))
6992 13334 : return false;
6993 5318180 : iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPSILON), 0.0));
6994 5318180 : iSrcYMax = std::min(static_cast<int>(ceil(dfYMax - EPSILON)), nSrcYSize);
6995 5318180 : if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
6996 0 : iSrcYMax++;
6997 :
6998 5318180 : return true;
6999 : }
7000 :
7001 : /************************************************************************/
7002 : /* GWKModeRealType() */
7003 : /************************************************************************/
7004 :
7005 17780 : template <class T> static inline bool IsSame(T a, T b)
7006 : {
7007 17780 : return a == b;
7008 : }
7009 :
7010 0 : template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b)
7011 : {
7012 0 : return a == b || (CPLIsNan(a) && CPLIsNan(b));
7013 : }
7014 :
7015 18 : template <> bool IsSame<float>(float a, float b)
7016 : {
7017 18 : return a == b || (std::isnan(a) && std::isnan(b));
7018 : }
7019 :
7020 56 : template <> bool IsSame<double>(double a, double b)
7021 : {
7022 56 : return a == b || (std::isnan(a) && std::isnan(b));
7023 : }
7024 :
7025 19 : template <class T> static void GWKModeRealType(GWKJobStruct *psJob)
7026 : {
7027 19 : const GDALWarpKernel *poWK = psJob->poWK;
7028 19 : const int iYMin = psJob->iYMin;
7029 19 : const int iYMax = psJob->iYMax;
7030 19 : const int nDstXSize = poWK->nDstXSize;
7031 19 : const int nSrcXSize = poWK->nSrcXSize;
7032 19 : const int nSrcYSize = poWK->nSrcYSize;
7033 19 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7034 :
7035 19 : T *pVals = nullptr;
7036 19 : float *pafCounts = nullptr;
7037 :
7038 19 : if (nSrcXSize > 0 && nSrcYSize > 0)
7039 : {
7040 : pVals = static_cast<T *>(
7041 19 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(T)));
7042 : pafCounts = static_cast<float *>(
7043 19 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
7044 19 : if (pVals == nullptr || pafCounts == nullptr)
7045 : {
7046 0 : VSIFree(pVals);
7047 0 : VSIFree(pafCounts);
7048 0 : return;
7049 : }
7050 : }
7051 :
7052 : /* -------------------------------------------------------------------- */
7053 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7054 : /* scanlines worth of positions. */
7055 : /* -------------------------------------------------------------------- */
7056 :
7057 : double *padfX =
7058 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7059 : double *padfY =
7060 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7061 : double *padfZ =
7062 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7063 : double *padfX2 =
7064 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7065 : double *padfY2 =
7066 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7067 : double *padfZ2 =
7068 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7069 19 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7070 19 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7071 :
7072 19 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7073 19 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7074 19 : const double dfErrorThreshold = CPLAtof(
7075 19 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7076 19 : const bool bAvoidNoDataSingleBand =
7077 19 : poWK->nBands == 1 ||
7078 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7079 : "UNIFIED_SRC_NODATA", "FALSE"));
7080 :
7081 19 : const int nXMargin =
7082 19 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7083 19 : const int nYMargin =
7084 19 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7085 :
7086 : /* ==================================================================== */
7087 : /* Loop over output lines. */
7088 : /* ==================================================================== */
7089 116 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7090 : {
7091 97 : GWKAverageOrModeComputeLineCoords(
7092 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7093 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7094 :
7095 : // Loop over pixels in output scanline.
7096 3514 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7097 : {
7098 3417 : GPtrDiff_t iSrcOffset = 0;
7099 3417 : double dfDensity = 1.0;
7100 3417 : bool bHasFoundDensity = false;
7101 :
7102 3417 : bool bWrapOverX = false;
7103 3417 : double dfXMin = 0;
7104 3417 : double dfYMin = 0;
7105 3417 : double dfXMax = 0;
7106 3417 : double dfYMax = 0;
7107 3417 : int iSrcXMin = 0;
7108 3417 : int iSrcYMin = 0;
7109 3417 : int iSrcXMax = 0;
7110 3417 : int iSrcYMax = 0;
7111 3417 : if (!GWKAverageOrModeComputeSourceCoords(
7112 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7113 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7114 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7115 : {
7116 0 : continue;
7117 : }
7118 :
7119 3417 : const GPtrDiff_t iDstOffset =
7120 3417 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7121 :
7122 : // Loop processing each band.
7123 6834 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7124 : {
7125 3417 : double dfBandDensity = 0.0;
7126 :
7127 3417 : int nBins = 0;
7128 3417 : int iModeIndex = -1;
7129 3417 : T nVal{};
7130 :
7131 10248 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7132 : {
7133 6831 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7134 6831 : iSrcOffset =
7135 6831 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7136 20530 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7137 : iSrcX++, iSrcOffset++)
7138 : {
7139 13699 : if (bWrapOverX)
7140 0 : iSrcOffset =
7141 0 : (iSrcX % nSrcXSize) +
7142 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7143 :
7144 13699 : if (poWK->panUnifiedSrcValid != nullptr &&
7145 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7146 0 : continue;
7147 :
7148 13699 : if (GWKGetPixelT(poWK, iBand, iSrcOffset,
7149 27398 : &dfBandDensity, &nVal) &&
7150 13699 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7151 : {
7152 13699 : const double dfWeight =
7153 13699 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7154 :
7155 : // Check array for existing entry.
7156 13699 : int i = 0;
7157 29194 : for (i = 0; i < nBins; ++i)
7158 : {
7159 17807 : if (IsSame(pVals[i], nVal))
7160 : {
7161 :
7162 2312 : pafCounts[i] +=
7163 2312 : static_cast<float>(dfWeight);
7164 2312 : bool bValIsMaxCount =
7165 2312 : (pafCounts[i] > pafCounts[iModeIndex]);
7166 :
7167 2312 : if (!bValIsMaxCount &&
7168 1498 : pafCounts[i] == pafCounts[iModeIndex])
7169 : {
7170 1490 : switch (eTieStrategy)
7171 : {
7172 1477 : case GWKTS_First:
7173 1477 : break;
7174 6 : case GWKTS_Min:
7175 6 : bValIsMaxCount =
7176 6 : nVal < pVals[iModeIndex];
7177 6 : break;
7178 7 : case GWKTS_Max:
7179 7 : bValIsMaxCount =
7180 7 : nVal > pVals[iModeIndex];
7181 7 : break;
7182 : }
7183 : }
7184 :
7185 2312 : if (bValIsMaxCount)
7186 : {
7187 817 : iModeIndex = i;
7188 : }
7189 :
7190 2312 : break;
7191 : }
7192 : }
7193 :
7194 : // Add to arr if entry not already there.
7195 13699 : if (i == nBins)
7196 : {
7197 11387 : pVals[i] = nVal;
7198 11387 : pafCounts[i] = static_cast<float>(dfWeight);
7199 :
7200 11387 : if (iModeIndex < 0)
7201 3417 : iModeIndex = i;
7202 :
7203 11387 : ++nBins;
7204 : }
7205 : }
7206 : }
7207 : }
7208 :
7209 3417 : if (iModeIndex != -1)
7210 : {
7211 3417 : nVal = pVals[iModeIndex];
7212 3417 : dfBandDensity = 1;
7213 3417 : bHasFoundDensity = true;
7214 : }
7215 :
7216 : // We have a computed value from the source. Now apply it
7217 : // to the destination pixel
7218 3417 : if (bHasFoundDensity)
7219 : {
7220 3417 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
7221 : dfBandDensity, nVal,
7222 : bAvoidNoDataSingleBand);
7223 : }
7224 : }
7225 :
7226 3417 : if (!bHasFoundDensity)
7227 0 : continue;
7228 :
7229 3417 : if (!bAvoidNoDataSingleBand)
7230 : {
7231 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7232 : }
7233 :
7234 : /* --------------------------------------------------------------------
7235 : */
7236 : /* Update destination density/validity masks. */
7237 : /* --------------------------------------------------------------------
7238 : */
7239 3417 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7240 :
7241 3417 : if (poWK->panDstValid != nullptr)
7242 : {
7243 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7244 : }
7245 : } /* Next iDstX */
7246 :
7247 : /* --------------------------------------------------------------------
7248 : */
7249 : /* Report progress to the user, and optionally cancel out. */
7250 : /* --------------------------------------------------------------------
7251 : */
7252 97 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7253 0 : break;
7254 : }
7255 :
7256 : /* -------------------------------------------------------------------- */
7257 : /* Cleanup and return. */
7258 : /* -------------------------------------------------------------------- */
7259 19 : CPLFree(padfX);
7260 19 : CPLFree(padfY);
7261 19 : CPLFree(padfZ);
7262 19 : CPLFree(padfX2);
7263 19 : CPLFree(padfY2);
7264 19 : CPLFree(padfZ2);
7265 19 : CPLFree(pabSuccess);
7266 19 : CPLFree(pabSuccess2);
7267 19 : VSIFree(pVals);
7268 19 : VSIFree(pafCounts);
7269 : }
7270 :
7271 : /************************************************************************/
7272 : /* GWKModeComplexType() */
7273 : /************************************************************************/
7274 :
7275 8 : static void GWKModeComplexType(GWKJobStruct *psJob)
7276 : {
7277 8 : const GDALWarpKernel *poWK = psJob->poWK;
7278 8 : const int iYMin = psJob->iYMin;
7279 8 : const int iYMax = psJob->iYMax;
7280 8 : const int nDstXSize = poWK->nDstXSize;
7281 8 : const int nSrcXSize = poWK->nSrcXSize;
7282 8 : const int nSrcYSize = poWK->nSrcYSize;
7283 8 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7284 : const double dfMultFactorVerticalShiftPipeline =
7285 8 : poWK->bApplyVerticalShift
7286 8 : ? CPLAtof(CSLFetchNameValueDef(
7287 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
7288 : "1.0"))
7289 8 : : 0.0;
7290 : const bool bAvoidNoDataSingleBand =
7291 8 : poWK->nBands == 1 ||
7292 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7293 8 : "UNIFIED_SRC_NODATA", "FALSE"));
7294 :
7295 8 : double *padfRealVals = nullptr;
7296 8 : double *padfImagVals = nullptr;
7297 8 : float *pafCounts = nullptr;
7298 :
7299 8 : if (nSrcXSize > 0 && nSrcYSize > 0)
7300 : {
7301 : padfRealVals = static_cast<double *>(
7302 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
7303 : padfImagVals = static_cast<double *>(
7304 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
7305 : pafCounts = static_cast<float *>(
7306 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
7307 8 : if (padfRealVals == nullptr || padfImagVals == nullptr ||
7308 : pafCounts == nullptr)
7309 : {
7310 0 : VSIFree(padfRealVals);
7311 0 : VSIFree(padfImagVals);
7312 0 : VSIFree(pafCounts);
7313 0 : return;
7314 : }
7315 : }
7316 :
7317 : /* -------------------------------------------------------------------- */
7318 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7319 : /* scanlines worth of positions. */
7320 : /* -------------------------------------------------------------------- */
7321 :
7322 : double *padfX =
7323 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7324 : double *padfY =
7325 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7326 : double *padfZ =
7327 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7328 : double *padfX2 =
7329 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7330 : double *padfY2 =
7331 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7332 : double *padfZ2 =
7333 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7334 8 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7335 8 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7336 :
7337 8 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7338 8 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7339 8 : const double dfErrorThreshold = CPLAtof(
7340 8 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7341 :
7342 : const int nXMargin =
7343 8 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7344 : const int nYMargin =
7345 8 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7346 :
7347 : /* ==================================================================== */
7348 : /* Loop over output lines. */
7349 : /* ==================================================================== */
7350 16 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7351 : {
7352 8 : GWKAverageOrModeComputeLineCoords(
7353 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7354 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7355 :
7356 : // Loop over pixels in output scanline.
7357 16 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7358 : {
7359 8 : GPtrDiff_t iSrcOffset = 0;
7360 8 : double dfDensity = 1.0;
7361 8 : bool bHasFoundDensity = false;
7362 :
7363 8 : bool bWrapOverX = false;
7364 8 : double dfXMin = 0;
7365 8 : double dfYMin = 0;
7366 8 : double dfXMax = 0;
7367 8 : double dfYMax = 0;
7368 8 : int iSrcXMin = 0;
7369 8 : int iSrcYMin = 0;
7370 8 : int iSrcXMax = 0;
7371 8 : int iSrcYMax = 0;
7372 8 : if (!GWKAverageOrModeComputeSourceCoords(
7373 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7374 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7375 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7376 : {
7377 0 : continue;
7378 : }
7379 :
7380 8 : const GPtrDiff_t iDstOffset =
7381 8 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7382 :
7383 : // Loop processing each band.
7384 16 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7385 : {
7386 8 : double dfBandDensity = 0.0;
7387 :
7388 8 : int nBins = 0;
7389 8 : int iModeIndex = -1;
7390 8 : double dfValueReal = 0;
7391 8 : double dfValueImag = 0;
7392 :
7393 16 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7394 : {
7395 8 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7396 8 : iSrcOffset =
7397 8 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7398 38 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7399 : iSrcX++, iSrcOffset++)
7400 : {
7401 30 : if (bWrapOverX)
7402 0 : iSrcOffset =
7403 0 : (iSrcX % nSrcXSize) +
7404 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7405 :
7406 30 : if (poWK->panUnifiedSrcValid != nullptr &&
7407 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7408 0 : continue;
7409 :
7410 30 : if (GWKGetPixelValue(poWK, iBand, iSrcOffset,
7411 : &dfBandDensity, &dfValueReal,
7412 60 : &dfValueImag) &&
7413 30 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7414 : {
7415 30 : const double dfWeight =
7416 30 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7417 :
7418 : // Check array for existing entry.
7419 30 : int i = 0;
7420 49 : for (i = 0; i < nBins; ++i)
7421 : {
7422 47 : if (IsSame(padfRealVals[i], dfValueReal) &&
7423 14 : IsSame(padfImagVals[i], dfValueImag))
7424 : {
7425 :
7426 14 : pafCounts[i] +=
7427 14 : static_cast<float>(dfWeight);
7428 14 : bool bValIsMaxCount =
7429 14 : (pafCounts[i] > pafCounts[iModeIndex]);
7430 :
7431 14 : if (!bValIsMaxCount &&
7432 6 : pafCounts[i] == pafCounts[iModeIndex])
7433 : {
7434 3 : switch (eTieStrategy)
7435 : {
7436 3 : case GWKTS_First:
7437 3 : break;
7438 0 : case GWKTS_Min:
7439 0 : bValIsMaxCount =
7440 0 : dfValueReal <
7441 0 : padfRealVals[iModeIndex];
7442 0 : break;
7443 0 : case GWKTS_Max:
7444 0 : bValIsMaxCount =
7445 0 : dfValueReal >
7446 0 : padfRealVals[iModeIndex];
7447 0 : break;
7448 : }
7449 : }
7450 :
7451 14 : if (bValIsMaxCount)
7452 : {
7453 8 : iModeIndex = i;
7454 : }
7455 :
7456 14 : break;
7457 : }
7458 : }
7459 :
7460 : // Add to arr if entry not already there.
7461 30 : if (i == nBins)
7462 : {
7463 16 : padfRealVals[i] = dfValueReal;
7464 16 : padfImagVals[i] = dfValueImag;
7465 16 : pafCounts[i] = static_cast<float>(dfWeight);
7466 :
7467 16 : if (iModeIndex < 0)
7468 8 : iModeIndex = i;
7469 :
7470 16 : ++nBins;
7471 : }
7472 : }
7473 : }
7474 : }
7475 :
7476 8 : if (iModeIndex != -1)
7477 : {
7478 8 : dfValueReal = padfRealVals[iModeIndex];
7479 8 : dfValueImag = padfImagVals[iModeIndex];
7480 8 : dfBandDensity = 1;
7481 :
7482 8 : if (poWK->bApplyVerticalShift)
7483 : {
7484 0 : if (!std::isfinite(padfZ[iDstX]))
7485 0 : continue;
7486 : // Subtract padfZ[] since the coordinate
7487 : // transformation is from target to source
7488 0 : dfValueReal =
7489 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7490 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
7491 : }
7492 :
7493 8 : bHasFoundDensity = true;
7494 : }
7495 :
7496 : // We have a computed value from the source. Now apply it
7497 : // to the destination pixel
7498 8 : if (bHasFoundDensity)
7499 : {
7500 8 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
7501 : dfValueReal, dfValueImag,
7502 : bAvoidNoDataSingleBand);
7503 : }
7504 : }
7505 :
7506 8 : if (!bHasFoundDensity)
7507 0 : continue;
7508 :
7509 8 : if (!bAvoidNoDataSingleBand)
7510 : {
7511 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7512 : }
7513 :
7514 : /* --------------------------------------------------------------------
7515 : */
7516 : /* Update destination density/validity masks. */
7517 : /* --------------------------------------------------------------------
7518 : */
7519 8 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7520 :
7521 8 : if (poWK->panDstValid != nullptr)
7522 : {
7523 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7524 : }
7525 : } /* Next iDstX */
7526 :
7527 : /* --------------------------------------------------------------------
7528 : */
7529 : /* Report progress to the user, and optionally cancel out. */
7530 : /* --------------------------------------------------------------------
7531 : */
7532 8 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7533 0 : break;
7534 : }
7535 :
7536 : /* -------------------------------------------------------------------- */
7537 : /* Cleanup and return. */
7538 : /* -------------------------------------------------------------------- */
7539 8 : CPLFree(padfX);
7540 8 : CPLFree(padfY);
7541 8 : CPLFree(padfZ);
7542 8 : CPLFree(padfX2);
7543 8 : CPLFree(padfY2);
7544 8 : CPLFree(padfZ2);
7545 8 : CPLFree(pabSuccess);
7546 8 : CPLFree(pabSuccess2);
7547 8 : VSIFree(padfRealVals);
7548 8 : VSIFree(padfImagVals);
7549 8 : VSIFree(pafCounts);
7550 : }
7551 :
7552 : /************************************************************************/
7553 : /* GWKAverageOrModeThread() */
7554 : /************************************************************************/
7555 :
7556 : // Overall logic based on GWKGeneralCaseThread().
7557 246 : static void GWKAverageOrModeThread(void *pData)
7558 : {
7559 246 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
7560 246 : const GDALWarpKernel *poWK = psJob->poWK;
7561 246 : const int iYMin = psJob->iYMin;
7562 246 : const int iYMax = psJob->iYMax;
7563 : const double dfMultFactorVerticalShiftPipeline =
7564 246 : poWK->bApplyVerticalShift
7565 246 : ? CPLAtof(CSLFetchNameValueDef(
7566 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
7567 : "1.0"))
7568 246 : : 0.0;
7569 : const bool bAvoidNoDataSingleBand =
7570 342 : poWK->nBands == 1 ||
7571 96 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7572 246 : "UNIFIED_SRC_NODATA", "FALSE"));
7573 :
7574 246 : const int nDstXSize = poWK->nDstXSize;
7575 246 : const int nSrcXSize = poWK->nSrcXSize;
7576 :
7577 : /* -------------------------------------------------------------------- */
7578 : /* Find out which algorithm to use (small optim.) */
7579 : /* -------------------------------------------------------------------- */
7580 :
7581 : // Only used for GRA_Mode
7582 246 : float *pafCounts = nullptr;
7583 246 : int nBins = 0;
7584 246 : int nBinsOffset = 0;
7585 246 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7586 :
7587 : // Only used with Q1, Med and Q3
7588 246 : float quant = 0.0f;
7589 :
7590 : // To control array allocation only when data type is complex
7591 246 : const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
7592 :
7593 246 : if (poWK->eResample == GRA_Mode)
7594 : {
7595 45 : if (poWK->bApplyVerticalShift)
7596 : {
7597 0 : return GWKModeComplexType(psJob);
7598 : }
7599 :
7600 45 : switch (poWK->eWorkingDataType)
7601 : {
7602 7 : case GDT_UInt8:
7603 7 : nBins = 256;
7604 7 : break;
7605 :
7606 1 : case GDT_Int8:
7607 1 : nBins = 256;
7608 1 : nBinsOffset = nBins / 2;
7609 1 : break;
7610 :
7611 1 : case GDT_UInt16:
7612 1 : nBins = 65536;
7613 1 : break;
7614 :
7615 9 : case GDT_Int16:
7616 9 : nBins = 65536;
7617 9 : nBinsOffset = nBins / 2;
7618 9 : break;
7619 :
7620 10 : case GDT_Int32:
7621 10 : return GWKModeRealType<int32_t>(psJob);
7622 :
7623 1 : case GDT_UInt32:
7624 1 : return GWKModeRealType<uint32_t>(psJob);
7625 :
7626 1 : case GDT_Int64:
7627 1 : return GWKModeRealType<int64_t>(psJob);
7628 :
7629 1 : case GDT_UInt64:
7630 1 : return GWKModeRealType<uint64_t>(psJob);
7631 :
7632 0 : case GDT_Float16:
7633 0 : return GWKModeRealType<GFloat16>(psJob);
7634 :
7635 4 : case GDT_Float32:
7636 4 : return GWKModeRealType<float>(psJob);
7637 :
7638 2 : case GDT_Float64:
7639 2 : return GWKModeRealType<double>(psJob);
7640 :
7641 8 : case GDT_CInt16:
7642 : case GDT_CInt32:
7643 : case GDT_CFloat16:
7644 : case GDT_CFloat32:
7645 : case GDT_CFloat64:
7646 8 : return GWKModeComplexType(psJob);
7647 :
7648 0 : case GDT_Unknown:
7649 : case GDT_TypeCount:
7650 0 : CPLAssert(false);
7651 : return;
7652 : }
7653 :
7654 18 : if (nBins)
7655 : {
7656 : pafCounts =
7657 18 : static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
7658 18 : if (pafCounts == nullptr)
7659 0 : return;
7660 : }
7661 : }
7662 201 : else if (poWK->eResample == GRA_Med)
7663 : {
7664 6 : quant = 0.5f;
7665 : }
7666 195 : else if (poWK->eResample == GRA_Q1)
7667 : {
7668 10 : quant = 0.25f;
7669 : }
7670 185 : else if (poWK->eResample == GRA_Q3)
7671 : {
7672 5 : quant = 0.75f;
7673 : }
7674 180 : else if (poWK->eResample != GRA_Average && poWK->eResample != GRA_RMS &&
7675 11 : poWK->eResample != GRA_Min && poWK->eResample != GRA_Max)
7676 : {
7677 : // Other resample algorithms not permitted here.
7678 0 : CPLError(CE_Fatal, CPLE_AppDefined,
7679 : "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
7680 : "illegal resample");
7681 : }
7682 :
7683 219 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread()");
7684 :
7685 : /* -------------------------------------------------------------------- */
7686 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7687 : /* scanlines worth of positions. */
7688 : /* -------------------------------------------------------------------- */
7689 :
7690 : double *padfX =
7691 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7692 : double *padfY =
7693 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7694 : double *padfZ =
7695 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7696 : double *padfX2 =
7697 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7698 : double *padfY2 =
7699 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7700 : double *padfZ2 =
7701 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7702 219 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7703 219 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7704 :
7705 219 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7706 219 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7707 219 : const double dfErrorThreshold = CPLAtof(
7708 219 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7709 :
7710 : const double dfExcludedValuesThreshold =
7711 219 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7712 : "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
7713 219 : 100.0;
7714 : const double dfNodataValuesThreshold =
7715 219 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7716 : "NODATA_VALUES_PCT_THRESHOLD", "100")) /
7717 219 : 100.0;
7718 :
7719 : const int nXMargin =
7720 219 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7721 : const int nYMargin =
7722 219 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7723 :
7724 : /* ==================================================================== */
7725 : /* Loop over output lines. */
7726 : /* ==================================================================== */
7727 28777 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7728 : {
7729 28558 : GWKAverageOrModeComputeLineCoords(
7730 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7731 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7732 :
7733 : /* ====================================================================
7734 : */
7735 : /* Loop over pixels in output scanline. */
7736 : /* ====================================================================
7737 : */
7738 7357360 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7739 : {
7740 7328800 : GPtrDiff_t iSrcOffset = 0;
7741 7328800 : double dfDensity = 1.0;
7742 7328800 : bool bHasFoundDensity = false;
7743 :
7744 7328800 : bool bWrapOverX = false;
7745 7328800 : double dfXMin = 0;
7746 7328800 : double dfYMin = 0;
7747 7328800 : double dfXMax = 0;
7748 7328800 : double dfYMax = 0;
7749 7328800 : int iSrcXMin = 0;
7750 7328800 : int iSrcYMin = 0;
7751 7328800 : int iSrcXMax = 0;
7752 7328800 : int iSrcYMax = 0;
7753 7328800 : if (!GWKAverageOrModeComputeSourceCoords(
7754 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7755 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7756 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7757 : {
7758 3158560 : continue;
7759 : }
7760 :
7761 5314750 : const GPtrDiff_t iDstOffset =
7762 5314750 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7763 :
7764 5314750 : bool bDone = false;
7765 :
7766 : // Special Average mode where we process all bands together,
7767 : // to avoid averaging tuples that match an entry of m_aadfExcludedValues
7768 5314750 : constexpr double EPSILON = 1e-10;
7769 14838200 : if (poWK->eResample == GRA_Average &&
7770 4208720 : (!poWK->m_aadfExcludedValues.empty() ||
7771 393224 : dfNodataValuesThreshold < 1 - EPSILON) &&
7772 9523480 : !poWK->bApplyVerticalShift && !bIsComplex)
7773 : {
7774 393224 : double dfTotalWeightInvalid = 0.0;
7775 393224 : double dfTotalWeightExcluded = 0.0;
7776 393224 : double dfTotalWeightRegular = 0.0;
7777 786448 : std::vector<double> adfValueReal(poWK->nBands, 0);
7778 786448 : std::vector<double> adfValueAveraged(poWK->nBands, 0);
7779 : std::vector<int> anCountExcludedValues(
7780 393224 : poWK->m_aadfExcludedValues.size(), 0);
7781 :
7782 1179670 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7783 : {
7784 786448 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7785 786448 : iSrcOffset =
7786 786448 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7787 2359340 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7788 : iSrcX++, iSrcOffset++)
7789 : {
7790 1572900 : if (bWrapOverX)
7791 0 : iSrcOffset =
7792 0 : (iSrcX % nSrcXSize) +
7793 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7794 :
7795 1572900 : const double dfWeight =
7796 1572900 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7797 1572900 : if (dfWeight <= 0)
7798 0 : continue;
7799 :
7800 1572910 : if (poWK->panUnifiedSrcValid != nullptr &&
7801 12 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7802 : {
7803 3 : dfTotalWeightInvalid += dfWeight;
7804 3 : continue;
7805 : }
7806 :
7807 1572890 : bool bAllValid = true;
7808 2359410 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7809 : {
7810 2097230 : double dfBandDensity = 0;
7811 2097230 : double dfValueImagTmp = 0;
7812 2883740 : if (!(GWKGetPixelValue(
7813 : poWK, iBand, iSrcOffset, &dfBandDensity,
7814 2097230 : &adfValueReal[iBand], &dfValueImagTmp) &&
7815 786513 : dfBandDensity > BAND_DENSITY_THRESHOLD))
7816 : {
7817 1310720 : bAllValid = false;
7818 1310720 : break;
7819 : }
7820 : }
7821 :
7822 1572890 : if (!bAllValid)
7823 : {
7824 1310720 : dfTotalWeightInvalid += dfWeight;
7825 1310720 : continue;
7826 : }
7827 :
7828 262177 : bool bExcludedValueFound = false;
7829 393263 : for (size_t i = 0;
7830 393263 : i < poWK->m_aadfExcludedValues.size(); ++i)
7831 : {
7832 131092 : if (poWK->m_aadfExcludedValues[i] == adfValueReal)
7833 : {
7834 6 : bExcludedValueFound = true;
7835 6 : ++anCountExcludedValues[i];
7836 6 : dfTotalWeightExcluded += dfWeight;
7837 6 : break;
7838 : }
7839 : }
7840 262177 : if (!bExcludedValueFound)
7841 : {
7842 : // Weighted incremental algorithm mean
7843 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7844 262171 : dfTotalWeightRegular += dfWeight;
7845 1048670 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7846 : {
7847 786495 : adfValueAveraged[iBand] +=
7848 1572990 : (dfWeight / dfTotalWeightRegular) *
7849 1572990 : (adfValueReal[iBand] -
7850 786495 : adfValueAveraged[iBand]);
7851 : }
7852 : }
7853 : }
7854 : }
7855 :
7856 393224 : const double dfTotalWeight = dfTotalWeightInvalid +
7857 : dfTotalWeightExcluded +
7858 : dfTotalWeightRegular;
7859 393224 : if (dfTotalWeightInvalid > 0 &&
7860 : dfTotalWeightInvalid >=
7861 327685 : dfNodataValuesThreshold * dfTotalWeight)
7862 : {
7863 : // Do nothing. Let bHasFoundDensity to false.
7864 : }
7865 65543 : else if (dfTotalWeightExcluded > 0 &&
7866 : dfTotalWeightExcluded >=
7867 6 : dfExcludedValuesThreshold * dfTotalWeight)
7868 : {
7869 : // Find the most represented excluded value tuple
7870 2 : size_t iExcludedValue = 0;
7871 2 : int nExcludedValueCount = 0;
7872 4 : for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
7873 : ++i)
7874 : {
7875 2 : if (anCountExcludedValues[i] > nExcludedValueCount)
7876 : {
7877 2 : iExcludedValue = i;
7878 2 : nExcludedValueCount = anCountExcludedValues[i];
7879 : }
7880 : }
7881 :
7882 2 : bHasFoundDensity = true;
7883 :
7884 8 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7885 : {
7886 6 : GWKSetPixelValue(
7887 : poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
7888 6 : poWK->m_aadfExcludedValues[iExcludedValue][iBand],
7889 : 0, bAvoidNoDataSingleBand);
7890 : }
7891 :
7892 2 : if (!bAvoidNoDataSingleBand)
7893 : {
7894 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7895 2 : }
7896 : }
7897 65541 : else if (dfTotalWeightRegular > 0)
7898 : {
7899 65541 : bHasFoundDensity = true;
7900 :
7901 262160 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7902 : {
7903 196619 : GWKSetPixelValue(poWK, iBand, iDstOffset,
7904 : /* dfBandDensity = */ 1.0,
7905 196619 : adfValueAveraged[iBand], 0,
7906 : bAvoidNoDataSingleBand);
7907 : }
7908 :
7909 65541 : if (!bAvoidNoDataSingleBand)
7910 : {
7911 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7912 : }
7913 : }
7914 :
7915 : // Skip below loop on bands
7916 393224 : bDone = true;
7917 : }
7918 :
7919 : /* ====================================================================
7920 : */
7921 : /* Loop processing each band. */
7922 : /* ====================================================================
7923 : */
7924 :
7925 17670500 : for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
7926 : {
7927 12355700 : double dfBandDensity = 0.0;
7928 12355700 : double dfValueReal = 0.0;
7929 12355700 : double dfValueImag = 0.0;
7930 12355700 : double dfValueRealTmp = 0.0;
7931 12355700 : double dfValueImagTmp = 0.0;
7932 :
7933 : /* --------------------------------------------------------------------
7934 : */
7935 : /* Collect the source value. */
7936 : /* --------------------------------------------------------------------
7937 : */
7938 :
7939 : // Loop over source lines and pixels - 3 possible algorithms.
7940 :
7941 12355700 : if (poWK->eResample == GRA_Average)
7942 : {
7943 9833240 : double dfTotalWeight = 0.0;
7944 :
7945 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7946 : // in gcore/overview.cpp.
7947 25243600 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7948 : {
7949 15410300 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7950 15410300 : iSrcOffset = iSrcXMin +
7951 15410300 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7952 44761400 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7953 : iSrcX++, iSrcOffset++)
7954 : {
7955 29351100 : if (bWrapOverX)
7956 2571 : iSrcOffset =
7957 2571 : (iSrcX % nSrcXSize) +
7958 2571 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7959 :
7960 29351100 : if (poWK->panUnifiedSrcValid != nullptr &&
7961 4 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7962 : iSrcOffset))
7963 : {
7964 1 : continue;
7965 : }
7966 :
7967 29351100 : if (GWKGetPixelValue(
7968 : poWK, iBand, iSrcOffset, &dfBandDensity,
7969 48239400 : &dfValueRealTmp, &dfValueImagTmp) &&
7970 18888400 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7971 : {
7972 18888400 : const double dfWeight =
7973 18888400 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7974 18888400 : if (dfWeight > 0)
7975 : {
7976 : // Weighted incremental algorithm mean
7977 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7978 18888400 : dfTotalWeight += dfWeight;
7979 18888400 : dfValueReal +=
7980 18888400 : (dfWeight / dfTotalWeight) *
7981 18888400 : (dfValueRealTmp - dfValueReal);
7982 18888400 : if (bIsComplex)
7983 : {
7984 252 : dfValueImag +=
7985 252 : (dfWeight / dfTotalWeight) *
7986 252 : (dfValueImagTmp - dfValueImag);
7987 : }
7988 : }
7989 : }
7990 : }
7991 : }
7992 :
7993 9833240 : if (dfTotalWeight > 0)
7994 : {
7995 7530420 : if (poWK->bApplyVerticalShift)
7996 : {
7997 0 : if (!std::isfinite(padfZ[iDstX]))
7998 0 : continue;
7999 : // Subtract padfZ[] since the coordinate
8000 : // transformation is from target to source
8001 0 : dfValueReal =
8002 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8003 0 : padfZ[iDstX] *
8004 : dfMultFactorVerticalShiftPipeline;
8005 : }
8006 :
8007 7530420 : dfBandDensity = 1;
8008 7530420 : bHasFoundDensity = true;
8009 : }
8010 : } // GRA_Average.
8011 :
8012 2522460 : else if (poWK->eResample == GRA_RMS)
8013 : {
8014 300416 : double dfTotalReal = 0.0;
8015 300416 : double dfTotalImag = 0.0;
8016 300416 : double dfTotalWeight = 0.0;
8017 : // This code adapted from GDALDownsampleChunk32R_AverageT()
8018 : // in gcore/overview.cpp.
8019 630578 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8020 : {
8021 330162 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
8022 330162 : iSrcOffset = iSrcXMin +
8023 330162 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8024 772930 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8025 : iSrcX++, iSrcOffset++)
8026 : {
8027 442768 : if (bWrapOverX)
8028 1371 : iSrcOffset =
8029 1371 : (iSrcX % nSrcXSize) +
8030 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8031 :
8032 442768 : if (poWK->panUnifiedSrcValid != nullptr &&
8033 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8034 : iSrcOffset))
8035 : {
8036 0 : continue;
8037 : }
8038 :
8039 442768 : if (GWKGetPixelValue(
8040 : poWK, iBand, iSrcOffset, &dfBandDensity,
8041 885536 : &dfValueRealTmp, &dfValueImagTmp) &&
8042 442768 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8043 : {
8044 442768 : const double dfWeight =
8045 442768 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
8046 442768 : dfTotalWeight += dfWeight;
8047 442768 : dfTotalReal +=
8048 442768 : dfValueRealTmp * dfValueRealTmp * dfWeight;
8049 442768 : if (bIsComplex)
8050 48 : dfTotalImag += dfValueImagTmp *
8051 48 : dfValueImagTmp * dfWeight;
8052 : }
8053 : }
8054 : }
8055 :
8056 300416 : if (dfTotalWeight > 0)
8057 : {
8058 300416 : dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
8059 :
8060 300416 : if (poWK->bApplyVerticalShift)
8061 : {
8062 0 : if (!std::isfinite(padfZ[iDstX]))
8063 0 : continue;
8064 : // Subtract padfZ[] since the coordinate
8065 : // transformation is from target to source
8066 0 : dfValueReal =
8067 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8068 0 : padfZ[iDstX] *
8069 : dfMultFactorVerticalShiftPipeline;
8070 : }
8071 :
8072 300416 : if (bIsComplex)
8073 12 : dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
8074 :
8075 300416 : dfBandDensity = 1;
8076 300416 : bHasFoundDensity = true;
8077 : }
8078 : } // GRA_RMS.
8079 :
8080 2222040 : else if (poWK->eResample == GRA_Mode)
8081 : {
8082 496623 : float fMaxCount = 0.0f;
8083 496623 : int nMode = -1;
8084 496623 : bool bHasSourceValues = false;
8085 :
8086 496623 : memset(pafCounts, 0, nBins * sizeof(float));
8087 :
8088 1167120 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8089 : {
8090 670495 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
8091 670495 : iSrcOffset = iSrcXMin +
8092 670495 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8093 1964680 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8094 : iSrcX++, iSrcOffset++)
8095 : {
8096 1294190 : if (bWrapOverX)
8097 1371 : iSrcOffset =
8098 1371 : (iSrcX % nSrcXSize) +
8099 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8100 :
8101 1294190 : if (poWK->panUnifiedSrcValid != nullptr &&
8102 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8103 : iSrcOffset))
8104 0 : continue;
8105 :
8106 1294190 : if (GWKGetPixelValue(
8107 : poWK, iBand, iSrcOffset, &dfBandDensity,
8108 2588370 : &dfValueRealTmp, &dfValueImagTmp) &&
8109 1294190 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8110 : {
8111 1294190 : bHasSourceValues = true;
8112 1294190 : const int nVal =
8113 1294190 : static_cast<int>(dfValueRealTmp);
8114 1294190 : const int iBin = nVal + nBinsOffset;
8115 1294190 : const double dfWeight =
8116 1294190 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
8117 :
8118 : // Sum the density.
8119 1294190 : pafCounts[iBin] += static_cast<float>(dfWeight);
8120 : // Is it the most common value so far?
8121 1294190 : bool bUpdateMode = pafCounts[iBin] > fMaxCount;
8122 1294190 : if (!bUpdateMode &&
8123 227545 : pafCounts[iBin] == fMaxCount)
8124 : {
8125 15866 : switch (eTieStrategy)
8126 : {
8127 15858 : case GWKTS_First:
8128 15858 : break;
8129 4 : case GWKTS_Min:
8130 4 : bUpdateMode = nVal < nMode;
8131 4 : break;
8132 4 : case GWKTS_Max:
8133 4 : bUpdateMode = nVal > nMode;
8134 4 : break;
8135 : }
8136 : }
8137 1294190 : if (bUpdateMode)
8138 : {
8139 1066640 : nMode = nVal;
8140 1066640 : fMaxCount = pafCounts[iBin];
8141 : }
8142 : }
8143 : }
8144 : }
8145 :
8146 496623 : if (bHasSourceValues)
8147 : {
8148 496623 : dfValueReal = nMode;
8149 496623 : dfBandDensity = 1;
8150 496623 : bHasFoundDensity = true;
8151 : }
8152 : } // GRA_Mode.
8153 :
8154 1725420 : else if (poWK->eResample == GRA_Max)
8155 : {
8156 335037 : bool bFoundValid = false;
8157 335037 : double dfTotalReal = cpl::NumericLimits<double>::lowest();
8158 : // This code adapted from nAlgo 1 method, GRA_Average.
8159 842572 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8160 : {
8161 507535 : iSrcOffset = iSrcXMin +
8162 507535 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8163 1638060 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8164 : iSrcX++, iSrcOffset++)
8165 : {
8166 1130520 : if (bWrapOverX)
8167 1371 : iSrcOffset =
8168 1371 : (iSrcX % nSrcXSize) +
8169 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8170 :
8171 1133330 : if (poWK->panUnifiedSrcValid != nullptr &&
8172 2809 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8173 : iSrcOffset))
8174 : {
8175 2446 : continue;
8176 : }
8177 :
8178 : // Returns pixel value if it is not no data.
8179 1128070 : if (GWKGetPixelValue(
8180 : poWK, iBand, iSrcOffset, &dfBandDensity,
8181 2256150 : &dfValueRealTmp, &dfValueImagTmp) &&
8182 1128070 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8183 : {
8184 1128070 : bFoundValid = true;
8185 1128070 : if (dfTotalReal < dfValueRealTmp)
8186 : {
8187 463372 : dfTotalReal = dfValueRealTmp;
8188 : }
8189 : }
8190 : }
8191 : }
8192 :
8193 335037 : if (bFoundValid)
8194 : {
8195 335037 : dfValueReal = dfTotalReal;
8196 :
8197 335037 : if (poWK->bApplyVerticalShift)
8198 : {
8199 0 : if (!std::isfinite(padfZ[iDstX]))
8200 0 : continue;
8201 : // Subtract padfZ[] since the coordinate
8202 : // transformation is from target to source
8203 0 : dfValueReal =
8204 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8205 0 : padfZ[iDstX] *
8206 : dfMultFactorVerticalShiftPipeline;
8207 : }
8208 :
8209 335037 : dfBandDensity = 1;
8210 335037 : bHasFoundDensity = true;
8211 : }
8212 : }
8213 :
8214 1390380 : else if (poWK->eResample == GRA_Min)
8215 : {
8216 335012 : bool bFoundValid = false;
8217 335012 : double dfTotalReal = cpl::NumericLimits<double>::max();
8218 : // This code adapted from nAlgo 1 method, GRA_Average.
8219 842282 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8220 : {
8221 507270 : iSrcOffset = iSrcXMin +
8222 507270 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8223 1634980 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8224 : iSrcX++, iSrcOffset++)
8225 : {
8226 1127710 : if (bWrapOverX)
8227 1371 : iSrcOffset =
8228 1371 : (iSrcX % nSrcXSize) +
8229 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8230 :
8231 1127710 : if (poWK->panUnifiedSrcValid != nullptr &&
8232 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8233 : iSrcOffset))
8234 : {
8235 0 : continue;
8236 : }
8237 :
8238 : // Returns pixel value if it is not no data.
8239 1127710 : if (GWKGetPixelValue(
8240 : poWK, iBand, iSrcOffset, &dfBandDensity,
8241 2255420 : &dfValueRealTmp, &dfValueImagTmp) &&
8242 1127710 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8243 : {
8244 1127710 : bFoundValid = true;
8245 1127710 : if (dfTotalReal > dfValueRealTmp)
8246 : {
8247 464157 : dfTotalReal = dfValueRealTmp;
8248 : }
8249 : }
8250 : }
8251 : }
8252 :
8253 335012 : if (bFoundValid)
8254 : {
8255 335012 : dfValueReal = dfTotalReal;
8256 :
8257 335012 : if (poWK->bApplyVerticalShift)
8258 : {
8259 0 : if (!std::isfinite(padfZ[iDstX]))
8260 0 : continue;
8261 : // Subtract padfZ[] since the coordinate
8262 : // transformation is from target to source
8263 0 : dfValueReal =
8264 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8265 0 : padfZ[iDstX] *
8266 : dfMultFactorVerticalShiftPipeline;
8267 : }
8268 :
8269 335012 : dfBandDensity = 1;
8270 335012 : bHasFoundDensity = true;
8271 : }
8272 : } // GRA_Min.
8273 :
8274 : else
8275 : // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
8276 : {
8277 1055370 : CPLAssert(quant > 0.0f);
8278 :
8279 1055370 : bool bFoundValid = false;
8280 1055370 : std::vector<double> dfRealValuesTmp;
8281 :
8282 : // This code adapted from nAlgo 1 method, GRA_Average.
8283 2677810 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8284 : {
8285 1622440 : iSrcOffset = iSrcXMin +
8286 1622440 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8287 5205220 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8288 : iSrcX++, iSrcOffset++)
8289 : {
8290 3582770 : if (bWrapOverX)
8291 4113 : iSrcOffset =
8292 4113 : (iSrcX % nSrcXSize) +
8293 4113 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8294 :
8295 3779380 : if (poWK->panUnifiedSrcValid != nullptr &&
8296 196608 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8297 : iSrcOffset))
8298 : {
8299 195449 : continue;
8300 : }
8301 :
8302 : // Returns pixel value if it is not no data.
8303 3387320 : if (GWKGetPixelValue(
8304 : poWK, iBand, iSrcOffset, &dfBandDensity,
8305 6774650 : &dfValueRealTmp, &dfValueImagTmp) &&
8306 3387320 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8307 : {
8308 3387320 : bFoundValid = true;
8309 3387320 : dfRealValuesTmp.push_back(dfValueRealTmp);
8310 : }
8311 : }
8312 : }
8313 :
8314 1055370 : if (bFoundValid)
8315 : {
8316 1006150 : std::sort(dfRealValuesTmp.begin(),
8317 : dfRealValuesTmp.end());
8318 : int quantIdx = static_cast<int>(
8319 1006150 : std::ceil(quant * dfRealValuesTmp.size() - 1));
8320 1006150 : dfValueReal = dfRealValuesTmp[quantIdx];
8321 :
8322 1006150 : if (poWK->bApplyVerticalShift)
8323 : {
8324 0 : if (!std::isfinite(padfZ[iDstX]))
8325 0 : continue;
8326 : // Subtract padfZ[] since the coordinate
8327 : // transformation is from target to source
8328 0 : dfValueReal =
8329 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8330 0 : padfZ[iDstX] *
8331 : dfMultFactorVerticalShiftPipeline;
8332 : }
8333 :
8334 1006150 : dfBandDensity = 1;
8335 1006150 : bHasFoundDensity = true;
8336 1006150 : dfRealValuesTmp.clear();
8337 : }
8338 : } // Quantile.
8339 :
8340 : /* --------------------------------------------------------------------
8341 : */
8342 : /* We have a computed value from the source. Now apply it
8343 : * to */
8344 : /* the destination pixel. */
8345 : /* --------------------------------------------------------------------
8346 : */
8347 12355700 : if (bHasFoundDensity)
8348 : {
8349 : // TODO: Should we compute dfBandDensity in fct of
8350 : // nCount/nCount2, or use as a threshold to set the dest
8351 : // value?
8352 : // dfBandDensity = (float) nCount / nCount2;
8353 : // if( (float) nCount / nCount2 > 0.1 )
8354 : // or fix gdalwarp crop_to_cutline to crop partially
8355 : // overlapping pixels.
8356 10003600 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8357 : dfValueReal, dfValueImag,
8358 : bAvoidNoDataSingleBand);
8359 : }
8360 : }
8361 :
8362 5314750 : if (!bHasFoundDensity)
8363 1144510 : continue;
8364 :
8365 4170240 : if (!bAvoidNoDataSingleBand)
8366 : {
8367 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
8368 : }
8369 :
8370 : /* --------------------------------------------------------------------
8371 : */
8372 : /* Update destination density/validity masks. */
8373 : /* --------------------------------------------------------------------
8374 : */
8375 4170240 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
8376 :
8377 4170240 : if (poWK->panDstValid != nullptr)
8378 : {
8379 1184 : CPLMaskSet(poWK->panDstValid, iDstOffset);
8380 : }
8381 : } /* Next iDstX */
8382 :
8383 : /* --------------------------------------------------------------------
8384 : */
8385 : /* Report progress to the user, and optionally cancel out. */
8386 : /* --------------------------------------------------------------------
8387 : */
8388 28558 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8389 0 : break;
8390 : }
8391 :
8392 : /* -------------------------------------------------------------------- */
8393 : /* Cleanup and return. */
8394 : /* -------------------------------------------------------------------- */
8395 219 : CPLFree(padfX);
8396 219 : CPLFree(padfY);
8397 219 : CPLFree(padfZ);
8398 219 : CPLFree(padfX2);
8399 219 : CPLFree(padfY2);
8400 219 : CPLFree(padfZ2);
8401 219 : CPLFree(pabSuccess);
8402 219 : CPLFree(pabSuccess2);
8403 219 : VSIFree(pafCounts);
8404 : }
8405 :
8406 : /************************************************************************/
8407 : /* getOrientation() */
8408 : /************************************************************************/
8409 :
8410 : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
8411 : // -1 if it is counter-clockwise oriented,
8412 : // or 0 if it is colinear.
8413 2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
8414 : {
8415 2355910 : const double p1x = p1.first;
8416 2355910 : const double p1y = p1.second;
8417 2355910 : const double p2x = p2.first;
8418 2355910 : const double p2y = p2.second;
8419 2355910 : const double p3x = p3.first;
8420 2355910 : const double p3y = p3.second;
8421 2355910 : const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
8422 2355910 : if (std::abs(val) < 1e-20)
8423 2690 : return 0;
8424 2353220 : else if (val > 0)
8425 0 : return 1;
8426 : else
8427 2353220 : return -1;
8428 : }
8429 :
8430 : /************************************************************************/
8431 : /* isConvex() */
8432 : /************************************************************************/
8433 :
8434 : // poly must be closed
8435 785302 : static bool isConvex(const XYPoly &poly)
8436 : {
8437 785302 : const size_t n = poly.size();
8438 785302 : size_t i = 0;
8439 785302 : int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8440 785302 : ++i;
8441 2355910 : for (; i < n - 2; ++i)
8442 : {
8443 : const int orientation =
8444 1570600 : getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8445 1570600 : if (orientation != 0)
8446 : {
8447 1567910 : if (last_orientation == 0)
8448 0 : last_orientation = orientation;
8449 1567910 : else if (orientation != last_orientation)
8450 0 : return false;
8451 : }
8452 : }
8453 785302 : return true;
8454 : }
8455 :
8456 : /************************************************************************/
8457 : /* pointIntersectsConvexPoly() */
8458 : /************************************************************************/
8459 :
8460 : // Returns whether xy intersects poly, that must be closed and convex.
8461 6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
8462 : {
8463 6049100 : const size_t n = poly.size();
8464 6049100 : double dx1 = xy.first - poly[0].first;
8465 6049100 : double dy1 = xy.second - poly[0].second;
8466 6049100 : double dx2 = poly[1].first - poly[0].first;
8467 6049100 : double dy2 = poly[1].second - poly[0].second;
8468 6049100 : double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
8469 :
8470 : // Check if the point remains on the same side (left/right) of all edges
8471 14556400 : for (size_t i = 2; i < n; i++)
8472 : {
8473 12793100 : dx1 = xy.first - poly[i - 1].first;
8474 12793100 : dy1 = xy.second - poly[i - 1].second;
8475 :
8476 12793100 : dx2 = poly[i].first - poly[i - 1].first;
8477 12793100 : dy2 = poly[i].second - poly[i - 1].second;
8478 :
8479 12793100 : double crossProduct = dx1 * dy2 - dx2 * dy1;
8480 12793100 : if (std::abs(prevCrossProduct) < 1e-20)
8481 725558 : prevCrossProduct = crossProduct;
8482 12067500 : else if (prevCrossProduct * crossProduct < 0)
8483 4285760 : return false;
8484 : }
8485 :
8486 1763340 : return true;
8487 : }
8488 :
8489 : /************************************************************************/
8490 : /* getIntersection() */
8491 : /************************************************************************/
8492 :
8493 : /* Returns intersection of [p1,p2] with [p3,p4], if
8494 : * it is a single point, and the 2 segments are not colinear.
8495 : */
8496 11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
8497 : const XYPair &p3, const XYPair &p4, XYPair &xy)
8498 : {
8499 11811000 : const double x1 = p1.first;
8500 11811000 : const double y1 = p1.second;
8501 11811000 : const double x2 = p2.first;
8502 11811000 : const double y2 = p2.second;
8503 11811000 : const double x3 = p3.first;
8504 11811000 : const double y3 = p3.second;
8505 11811000 : const double x4 = p4.first;
8506 11811000 : const double y4 = p4.second;
8507 11811000 : const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
8508 11811000 : const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
8509 11811000 : if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
8510 9260780 : return false;
8511 :
8512 2550260 : const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
8513 2550260 : if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
8514 973924 : return false;
8515 :
8516 1576340 : const double t = t_num / denom;
8517 1576340 : xy.first = x1 + t * (x2 - x1);
8518 1576340 : xy.second = y1 + t * (y2 - y1);
8519 1576340 : return true;
8520 : }
8521 :
8522 : /************************************************************************/
8523 : /* getConvexPolyIntersection() */
8524 : /************************************************************************/
8525 :
8526 : // poly1 and poly2 must be closed and convex.
8527 : // The returned intersection will not necessary be closed.
8528 785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
8529 : XYPoly &intersection)
8530 : {
8531 785302 : intersection.clear();
8532 :
8533 : // Add all points of poly1 inside poly2
8534 3926510 : for (size_t i = 0; i < poly1.size() - 1; ++i)
8535 : {
8536 3141210 : if (pointIntersectsConvexPoly(poly1[i], poly2))
8537 1187430 : intersection.push_back(poly1[i]);
8538 : }
8539 785302 : if (intersection.size() == poly1.size() - 1)
8540 : {
8541 : // poly1 is inside poly2
8542 119100 : return;
8543 : }
8544 :
8545 : // Add all points of poly2 inside poly1
8546 3634860 : for (size_t i = 0; i < poly2.size() - 1; ++i)
8547 : {
8548 2907890 : if (pointIntersectsConvexPoly(poly2[i], poly1))
8549 575904 : intersection.push_back(poly2[i]);
8550 : }
8551 :
8552 : // Compute the intersection of all edges of both polygons
8553 726972 : XYPair xy;
8554 3634860 : for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
8555 : {
8556 14539400 : for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
8557 : {
8558 11631600 : if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
8559 11631600 : poly2[i2 + 1], xy))
8560 : {
8561 1576230 : intersection.push_back(xy);
8562 : }
8563 : }
8564 : }
8565 :
8566 726972 : if (intersection.empty())
8567 60770 : return;
8568 :
8569 : // Find lowest-left point in intersection set
8570 666202 : double lowest_x = cpl::NumericLimits<double>::max();
8571 666202 : double lowest_y = cpl::NumericLimits<double>::max();
8572 3772450 : for (const auto &pair : intersection)
8573 : {
8574 3106240 : const double x = pair.first;
8575 3106240 : const double y = pair.second;
8576 3106240 : if (y < lowest_y || (y == lowest_y && x < lowest_x))
8577 : {
8578 1096040 : lowest_x = x;
8579 1096040 : lowest_y = y;
8580 : }
8581 : }
8582 :
8583 5737980 : const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
8584 : {
8585 5737980 : const double p1x_diff = p1.first - lowest_x;
8586 5737980 : const double p1y_diff = p1.second - lowest_y;
8587 5737980 : const double p2x_diff = p2.first - lowest_x;
8588 5737980 : const double p2y_diff = p2.second - lowest_y;
8589 5737980 : if (p2y_diff == 0.0 && p1y_diff == 0.0)
8590 : {
8591 2655420 : if (p1x_diff >= 0)
8592 : {
8593 2655420 : if (p2x_diff >= 0)
8594 2655420 : return p1.first < p2.first;
8595 0 : return true;
8596 : }
8597 : else
8598 : {
8599 0 : if (p2x_diff >= 0)
8600 0 : return false;
8601 0 : return p1.first < p2.first;
8602 : }
8603 : }
8604 :
8605 3082560 : if (p2x_diff == 0.0 && p1x_diff == 0.0)
8606 1046960 : return p1.second < p2.second;
8607 :
8608 : double tan_p1;
8609 2035600 : if (p1x_diff == 0.0)
8610 464622 : tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8611 : else
8612 1570980 : tan_p1 = p1y_diff / p1x_diff;
8613 :
8614 : double tan_p2;
8615 2035600 : if (p2x_diff == 0.0)
8616 839515 : tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8617 : else
8618 1196080 : tan_p2 = p2y_diff / p2x_diff;
8619 :
8620 2035600 : if (tan_p1 >= 0)
8621 : {
8622 1904790 : if (tan_p2 >= 0)
8623 1881590 : return tan_p1 < tan_p2;
8624 : else
8625 23199 : return true;
8626 : }
8627 : else
8628 : {
8629 130806 : if (tan_p2 >= 0)
8630 103900 : return false;
8631 : else
8632 26906 : return tan_p1 < tan_p2;
8633 : }
8634 666202 : };
8635 :
8636 : // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
8637 : // hull
8638 666202 : std::sort(intersection.begin(), intersection.end(), sortFunc);
8639 :
8640 : // Remove duplicated points
8641 666202 : size_t j = 1;
8642 3106240 : for (size_t i = 1; i < intersection.size(); ++i)
8643 : {
8644 2440040 : if (intersection[i] != intersection[i - 1])
8645 : {
8646 1452560 : if (j < i)
8647 545275 : intersection[j] = intersection[i];
8648 1452560 : ++j;
8649 : }
8650 : }
8651 666202 : intersection.resize(j);
8652 : }
8653 :
8654 : /************************************************************************/
8655 : /* GWKSumPreserving() */
8656 : /************************************************************************/
8657 :
8658 : static void GWKSumPreservingThread(void *pData);
8659 :
8660 19 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
8661 : {
8662 19 : return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
8663 : }
8664 :
8665 19 : static void GWKSumPreservingThread(void *pData)
8666 : {
8667 19 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
8668 19 : GDALWarpKernel *poWK = psJob->poWK;
8669 19 : const int iYMin = psJob->iYMin;
8670 19 : const int iYMax = psJob->iYMax;
8671 : const bool bIsAffineNoRotation =
8672 19 : GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
8673 28 : poWK->pTransformerArg) &&
8674 : // for debug/testing purposes
8675 9 : CPLTestBool(
8676 19 : CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
8677 : const bool bAvoidNoDataSingleBand =
8678 21 : poWK->nBands == 1 ||
8679 2 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
8680 19 : "UNIFIED_SRC_NODATA", "FALSE"));
8681 :
8682 19 : const int nDstXSize = poWK->nDstXSize;
8683 19 : const int nSrcXSize = poWK->nSrcXSize;
8684 19 : const int nSrcYSize = poWK->nSrcYSize;
8685 :
8686 38 : std::vector<double> adfX0(nSrcXSize + 1);
8687 38 : std::vector<double> adfY0(nSrcXSize + 1);
8688 38 : std::vector<double> adfZ0(nSrcXSize + 1);
8689 38 : std::vector<double> adfX1(nSrcXSize + 1);
8690 38 : std::vector<double> adfY1(nSrcXSize + 1);
8691 38 : std::vector<double> adfZ1(nSrcXSize + 1);
8692 38 : std::vector<int> abSuccess0(nSrcXSize + 1);
8693 38 : std::vector<int> abSuccess1(nSrcXSize + 1);
8694 :
8695 : CPLRectObj sGlobalBounds;
8696 19 : sGlobalBounds.minx = -2 * poWK->dfXScale;
8697 19 : sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
8698 19 : sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
8699 19 : sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
8700 19 : CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
8701 :
8702 : struct SourcePixel
8703 : {
8704 : int iSrcX;
8705 : int iSrcY;
8706 :
8707 : // Coordinates of source pixel in target pixel coordinates
8708 : double dfDstX0;
8709 : double dfDstY0;
8710 : double dfDstX1;
8711 : double dfDstY1;
8712 : double dfDstX2;
8713 : double dfDstY2;
8714 : double dfDstX3;
8715 : double dfDstY3;
8716 :
8717 : // Source pixel total area (might be larger than the one described
8718 : // by above coordinates, if the pixel was crossing the antimeridian
8719 : // and split)
8720 : double dfArea;
8721 : };
8722 :
8723 38 : std::vector<SourcePixel> sourcePixels;
8724 :
8725 38 : XYPoly discontinuityLeft(5);
8726 38 : XYPoly discontinuityRight(5);
8727 :
8728 : /* ==================================================================== */
8729 : /* First pass: transform the 4 corners of each potential */
8730 : /* contributing source pixel to target pixel coordinates. */
8731 : /* ==================================================================== */
8732 :
8733 : // Special case for top line
8734 : {
8735 19 : int iY = 0;
8736 3364 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8737 : {
8738 3345 : adfX1[iX] = iX + poWK->nSrcXOff;
8739 3345 : adfY1[iX] = iY + poWK->nSrcYOff;
8740 3345 : adfZ1[iX] = 0;
8741 : }
8742 :
8743 19 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8744 : adfX1.data(), adfY1.data(), adfZ1.data(),
8745 : abSuccess1.data());
8746 :
8747 3364 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8748 : {
8749 3345 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8750 0 : abSuccess1[iX] = FALSE;
8751 : else
8752 : {
8753 3345 : adfX1[iX] -= poWK->nDstXOff;
8754 3345 : adfY1[iX] -= poWK->nDstYOff;
8755 : }
8756 : }
8757 : }
8758 :
8759 2032 : const auto getInsideXSign = [poWK, nDstXSize](double dfX)
8760 : {
8761 2032 : return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
8762 872 : dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
8763 2032 : ? 1
8764 1160 : : -1;
8765 19 : };
8766 :
8767 : const auto FindDiscontinuity =
8768 80 : [poWK, psJob, getInsideXSign](
8769 : double dfXLeft, double dfXRight, double dfY,
8770 : int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
8771 800 : double &dfXMidReprojectedRight, double &dfYMidReprojected)
8772 : {
8773 880 : for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
8774 : {
8775 800 : double dfXMid = (dfXLeft + dfXRight) / 2;
8776 800 : double dfXMidReprojected = dfXMid;
8777 800 : dfYMidReprojected = dfY;
8778 800 : double dfZ = 0;
8779 800 : int nSuccess = 0;
8780 800 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
8781 : &dfXMidReprojected, &dfYMidReprojected, &dfZ,
8782 : &nSuccess);
8783 800 : if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
8784 : {
8785 456 : dfXRight = dfXMid;
8786 456 : dfXMidReprojectedRight = dfXMidReprojected;
8787 : }
8788 : else
8789 : {
8790 344 : dfXLeft = dfXMid;
8791 344 : dfXMidReprojectedLeft = dfXMidReprojected;
8792 : }
8793 : }
8794 80 : };
8795 :
8796 2685 : for (int iY = 0; iY < nSrcYSize; ++iY)
8797 : {
8798 2666 : std::swap(adfX0, adfX1);
8799 2666 : std::swap(adfY0, adfY1);
8800 2666 : std::swap(adfZ0, adfZ1);
8801 2666 : std::swap(abSuccess0, abSuccess1);
8802 :
8803 4836120 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8804 : {
8805 4833460 : adfX1[iX] = iX + poWK->nSrcXOff;
8806 4833460 : adfY1[iX] = iY + 1 + poWK->nSrcYOff;
8807 4833460 : adfZ1[iX] = 0;
8808 : }
8809 :
8810 2666 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8811 : adfX1.data(), adfY1.data(), adfZ1.data(),
8812 : abSuccess1.data());
8813 :
8814 4836120 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8815 : {
8816 4833460 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8817 0 : abSuccess1[iX] = FALSE;
8818 : else
8819 : {
8820 4833460 : adfX1[iX] -= poWK->nDstXOff;
8821 4833460 : adfY1[iX] -= poWK->nDstYOff;
8822 : }
8823 : }
8824 :
8825 4833460 : for (int iX = 0; iX < nSrcXSize; ++iX)
8826 : {
8827 9661580 : if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
8828 4830790 : abSuccess1[iX + 1])
8829 : {
8830 : /* --------------------------------------------------------------------
8831 : */
8832 : /* Do not try to apply transparent source pixels to the
8833 : * destination.*/
8834 : /* --------------------------------------------------------------------
8835 : */
8836 4830790 : const auto iSrcOffset =
8837 4830790 : iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
8838 9560570 : if (poWK->panUnifiedSrcValid != nullptr &&
8839 4729780 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
8840 : {
8841 4738340 : continue;
8842 : }
8843 :
8844 103415 : if (poWK->pafUnifiedSrcDensity != nullptr)
8845 : {
8846 0 : if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
8847 : SRC_DENSITY_THRESHOLD_FLOAT)
8848 0 : continue;
8849 : }
8850 :
8851 : SourcePixel sp;
8852 103415 : sp.dfArea = 0;
8853 103415 : sp.dfDstX0 = adfX0[iX];
8854 103415 : sp.dfDstY0 = adfY0[iX];
8855 103415 : sp.dfDstX1 = adfX0[iX + 1];
8856 103415 : sp.dfDstY1 = adfY0[iX + 1];
8857 103415 : sp.dfDstX2 = adfX1[iX + 1];
8858 103415 : sp.dfDstY2 = adfY1[iX + 1];
8859 103415 : sp.dfDstX3 = adfX1[iX];
8860 103415 : sp.dfDstY3 = adfY1[iX];
8861 :
8862 : // Detect pixel that likely cross the anti-meridian and
8863 : // introduce a discontinuity when reprojected.
8864 :
8865 103415 : if (std::fabs(adfX0[iX] - adfX0[iX + 1]) > 2 * poWK->dfXScale &&
8866 80 : std::fabs(adfX1[iX] - adfX1[iX + 1]) > 2 * poWK->dfXScale &&
8867 40 : getInsideXSign(adfX0[iX]) !=
8868 80 : getInsideXSign(adfX0[iX + 1]) &&
8869 80 : getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
8870 40 : getInsideXSign(adfX0[iX + 1]) ==
8871 103495 : getInsideXSign(adfX1[iX + 1]) &&
8872 40 : (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
8873 : 0)
8874 : {
8875 : #ifdef DEBUG_VERBOSE
8876 : CPLDebug(
8877 : "WARP",
8878 : "Discontinuity for iSrcX=%d, iSrcY=%d, dest corners:"
8879 : "X0[iX]=%f X0[iX+1]=%f X1[iX]=%f X1[iX+1]=%f,"
8880 : "Y0[iX]=%f Y0[iX+1]=%f Y1[iX]=%f Y1[iX+1]=%f",
8881 : iX + poWK->nSrcXOff, iY + poWK->nSrcYOff, adfX0[iX],
8882 : adfX0[iX + 1], adfX1[iX], adfX1[iX + 1], adfY0[iX],
8883 : adfY0[iX + 1], adfY1[iX], adfY1[iX + 1]);
8884 : #endif
8885 40 : double dfXMidReprojectedLeftTop = 0;
8886 40 : double dfXMidReprojectedRightTop = 0;
8887 40 : double dfYMidReprojectedTop = 0;
8888 40 : FindDiscontinuity(
8889 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8890 80 : iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
8891 : dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
8892 : dfYMidReprojectedTop);
8893 40 : double dfXMidReprojectedLeftBottom = 0;
8894 40 : double dfXMidReprojectedRightBottom = 0;
8895 40 : double dfYMidReprojectedBottom = 0;
8896 40 : FindDiscontinuity(
8897 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8898 80 : iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
8899 : dfXMidReprojectedLeftBottom,
8900 : dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
8901 :
8902 40 : discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
8903 40 : discontinuityLeft[1] =
8904 80 : XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
8905 40 : discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
8906 40 : dfYMidReprojectedBottom);
8907 40 : discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
8908 40 : discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
8909 :
8910 40 : discontinuityRight[0] =
8911 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8912 40 : discontinuityRight[1] =
8913 80 : XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
8914 40 : discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
8915 40 : dfYMidReprojectedBottom);
8916 40 : discontinuityRight[3] =
8917 80 : XYPair(adfX1[iX + 1], adfY1[iX + 1]);
8918 40 : discontinuityRight[4] =
8919 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8920 :
8921 40 : sp.dfArea = getArea(discontinuityLeft) +
8922 40 : getArea(discontinuityRight);
8923 40 : if (getInsideXSign(adfX0[iX]) >= 1)
8924 : {
8925 20 : sp.dfDstX1 = dfXMidReprojectedLeftTop;
8926 20 : sp.dfDstY1 = dfYMidReprojectedTop;
8927 20 : sp.dfDstX2 = dfXMidReprojectedLeftBottom;
8928 20 : sp.dfDstY2 = dfYMidReprojectedBottom;
8929 : }
8930 : else
8931 : {
8932 20 : sp.dfDstX0 = dfXMidReprojectedRightTop;
8933 20 : sp.dfDstY0 = dfYMidReprojectedTop;
8934 20 : sp.dfDstX3 = dfXMidReprojectedRightBottom;
8935 20 : sp.dfDstY3 = dfYMidReprojectedBottom;
8936 : }
8937 : }
8938 :
8939 : // Bounding box of source pixel (expressed in target pixel
8940 : // coordinates)
8941 : CPLRectObj sRect;
8942 103415 : sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
8943 103415 : std::min(sp.dfDstX2, sp.dfDstX3));
8944 103415 : sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
8945 103415 : std::min(sp.dfDstY2, sp.dfDstY3));
8946 103415 : sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
8947 103415 : std::max(sp.dfDstX2, sp.dfDstX3));
8948 103415 : sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
8949 103415 : std::max(sp.dfDstY2, sp.dfDstY3));
8950 103415 : if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
8951 101355 : sRect.miny < iYMax && sRect.maxy > iYMin))
8952 : {
8953 10852 : continue;
8954 : }
8955 :
8956 92563 : sp.iSrcX = iX;
8957 92563 : sp.iSrcY = iY;
8958 :
8959 92563 : if (!bIsAffineNoRotation)
8960 : {
8961 : // Check polygon validity (no self-crossing)
8962 89745 : XYPair xy;
8963 89745 : if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
8964 89745 : XYPair(sp.dfDstX1, sp.dfDstY1),
8965 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8966 269235 : XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
8967 89745 : getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
8968 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8969 89745 : XYPair(sp.dfDstX0, sp.dfDstY0),
8970 179490 : XYPair(sp.dfDstX3, sp.dfDstY3), xy))
8971 : {
8972 113 : continue;
8973 : }
8974 : }
8975 :
8976 92450 : CPLQuadTreeInsertWithBounds(
8977 : hQuadTree,
8978 : reinterpret_cast<void *>(
8979 92450 : static_cast<uintptr_t>(sourcePixels.size())),
8980 : &sRect);
8981 :
8982 92450 : sourcePixels.push_back(sp);
8983 : }
8984 : }
8985 : }
8986 :
8987 38 : std::vector<double> adfRealValue(poWK->nBands);
8988 38 : std::vector<double> adfImagValue(poWK->nBands);
8989 38 : std::vector<double> adfBandDensity(poWK->nBands);
8990 38 : std::vector<double> adfWeight(poWK->nBands);
8991 :
8992 : #ifdef CHECK_SUM_WITH_GEOS
8993 : auto hGEOSContext = OGRGeometry::createGEOSContext();
8994 : auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8995 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
8996 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
8997 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
8998 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
8999 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
9000 : auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
9001 : auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
9002 :
9003 : auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
9004 : auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
9005 : auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
9006 : #endif
9007 :
9008 : const XYPoly xy1{
9009 38 : {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
9010 38 : XYPoly xy2(5);
9011 38 : XYPoly xy2_triangle(4);
9012 38 : XYPoly intersection;
9013 :
9014 : /* ==================================================================== */
9015 : /* Loop over output lines. */
9016 : /* ==================================================================== */
9017 1951 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
9018 : {
9019 : CPLRectObj sRect;
9020 1932 : sRect.miny = iDstY;
9021 1932 : sRect.maxy = iDstY + 1;
9022 :
9023 : /* ====================================================================
9024 : */
9025 : /* Loop over pixels in output scanline. */
9026 : /* ====================================================================
9027 : */
9028 1403940 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
9029 : {
9030 1402010 : sRect.minx = iDstX;
9031 1402010 : sRect.maxx = iDstX + 1;
9032 1402010 : int nSourcePixels = 0;
9033 : void **pahSourcePixel =
9034 1402010 : CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
9035 1402010 : if (nSourcePixels == 0)
9036 : {
9037 1183090 : CPLFree(pahSourcePixel);
9038 1183100 : continue;
9039 : }
9040 :
9041 218919 : std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
9042 218919 : std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
9043 218919 : std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
9044 218919 : std::fill(adfWeight.begin(), adfWeight.end(), 0);
9045 218919 : double dfDensity = 0;
9046 : // Just above zero to please Coveriy Scan
9047 218919 : double dfTotalWeight = std::numeric_limits<double>::min();
9048 :
9049 : /* ====================================================================
9050 : */
9051 : /* Iterate over each contributing source pixel to add its
9052 : */
9053 : /* value weighed by the ratio of the area of its
9054 : * intersection */
9055 : /* with the target pixel divided by the area of the source
9056 : */
9057 : /* pixel. */
9058 : /* ====================================================================
9059 : */
9060 1020550 : for (int i = 0; i < nSourcePixels; ++i)
9061 : {
9062 801628 : const int iSourcePixel = static_cast<int>(
9063 801628 : reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
9064 801628 : auto &sp = sourcePixels[iSourcePixel];
9065 :
9066 801628 : double dfWeight = 0.0;
9067 801628 : if (bIsAffineNoRotation)
9068 : {
9069 : // Optimization since the source pixel is a rectangle in
9070 : // target pixel coordinates
9071 16326 : double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
9072 16326 : double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
9073 16326 : double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
9074 16326 : double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
9075 16326 : double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
9076 16326 : double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
9077 16326 : double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
9078 16326 : double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
9079 16326 : dfWeight =
9080 16326 : ((dfIntersMaxX - dfIntersMinX) *
9081 16326 : (dfIntersMaxY - dfIntersMinY)) /
9082 16326 : ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
9083 : }
9084 : else
9085 : {
9086 : // Compute the polygon of the source pixel in target pixel
9087 : // coordinates, and shifted to the target pixel (unit square
9088 : // coordinates)
9089 :
9090 785302 : xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
9091 785302 : xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
9092 785302 : xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
9093 785302 : xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
9094 785302 : xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
9095 :
9096 785302 : if (isConvex(xy2))
9097 : {
9098 785302 : getConvexPolyIntersection(xy1, xy2, intersection);
9099 785302 : if (intersection.size() >= 3)
9100 : {
9101 468849 : dfWeight = getArea(intersection);
9102 : }
9103 : }
9104 : else
9105 : {
9106 : // Split xy2 into 2 triangles.
9107 0 : xy2_triangle[0] = xy2[0];
9108 0 : xy2_triangle[1] = xy2[1];
9109 0 : xy2_triangle[2] = xy2[2];
9110 0 : xy2_triangle[3] = xy2[0];
9111 0 : getConvexPolyIntersection(xy1, xy2_triangle,
9112 : intersection);
9113 0 : if (intersection.size() >= 3)
9114 : {
9115 0 : dfWeight = getArea(intersection);
9116 : }
9117 :
9118 0 : xy2_triangle[1] = xy2[2];
9119 0 : xy2_triangle[2] = xy2[3];
9120 0 : getConvexPolyIntersection(xy1, xy2_triangle,
9121 : intersection);
9122 0 : if (intersection.size() >= 3)
9123 : {
9124 0 : dfWeight += getArea(intersection);
9125 : }
9126 : }
9127 785302 : if (dfWeight > 0.0)
9128 : {
9129 468828 : if (sp.dfArea == 0)
9130 89592 : sp.dfArea = getArea(xy2);
9131 468828 : dfWeight /= sp.dfArea;
9132 : }
9133 :
9134 : #ifdef CHECK_SUM_WITH_GEOS
9135 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
9136 : sp.dfDstX0 - iDstX,
9137 : sp.dfDstY0 - iDstY);
9138 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
9139 : sp.dfDstX1 - iDstX,
9140 : sp.dfDstY1 - iDstY);
9141 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
9142 : sp.dfDstX2 - iDstX,
9143 : sp.dfDstY2 - iDstY);
9144 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
9145 : sp.dfDstX3 - iDstX,
9146 : sp.dfDstY3 - iDstY);
9147 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
9148 : sp.dfDstX0 - iDstX,
9149 : sp.dfDstY0 - iDstY);
9150 :
9151 : double dfWeightGEOS = 0.0;
9152 : auto hIntersection =
9153 : GEOSIntersection_r(hGEOSContext, hP1, hP2);
9154 : if (hIntersection)
9155 : {
9156 : double dfIntersArea = 0.0;
9157 : if (GEOSArea_r(hGEOSContext, hIntersection,
9158 : &dfIntersArea) &&
9159 : dfIntersArea > 0)
9160 : {
9161 : double dfSourceArea = 0.0;
9162 : if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
9163 : {
9164 : dfWeightGEOS = dfIntersArea / dfSourceArea;
9165 : }
9166 : }
9167 : GEOSGeom_destroy_r(hGEOSContext, hIntersection);
9168 : }
9169 : if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
9170 : {
9171 : /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
9172 : dfWeight, dfWeightGEOS);
9173 : printf("xy2: "); // ok
9174 : for (const auto &xy : xy2)
9175 : printf("[%f, %f], ", xy.first, xy.second); // ok
9176 : printf("\n"); // ok
9177 : printf("intersection: "); // ok
9178 : for (const auto &xy : intersection)
9179 : printf("[%f, %f], ", xy.first, xy.second); // ok
9180 : printf("\n"); // ok
9181 : }
9182 : #endif
9183 : }
9184 801628 : if (dfWeight > 0.0)
9185 : {
9186 : #ifdef DEBUG_VERBOSE
9187 : #if defined(DST_X) && defined(DST_Y)
9188 : if (iDstX + poWK->nDstXOff == DST_X &&
9189 : iDstY + poWK->nDstYOff == DST_Y)
9190 : {
9191 : CPLDebug("WARP",
9192 : "iSrcX = %d, iSrcY = %d, weight =%.17g",
9193 : sp.iSrcX + poWK->nSrcXOff,
9194 : sp.iSrcY + poWK->nSrcYOff, dfWeight);
9195 : }
9196 : #endif
9197 : #endif
9198 :
9199 474104 : const GPtrDiff_t iSrcOffset =
9200 474104 : sp.iSrcX +
9201 474104 : static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
9202 474104 : dfTotalWeight += dfWeight;
9203 :
9204 474104 : if (poWK->pafUnifiedSrcDensity != nullptr)
9205 : {
9206 0 : dfDensity +=
9207 0 : dfWeight *
9208 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
9209 : }
9210 : else
9211 : {
9212 474104 : dfDensity += dfWeight;
9213 : }
9214 :
9215 1818730 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
9216 : {
9217 : // Returns pixel value if it is not no data.
9218 : double dfBandDensity;
9219 : double dfRealValue;
9220 : double dfImagValue;
9221 2689250 : if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
9222 : &dfBandDensity, &dfRealValue,
9223 : &dfImagValue) &&
9224 1344620 : dfBandDensity > BAND_DENSITY_THRESHOLD))
9225 : {
9226 0 : continue;
9227 : }
9228 : #ifdef DEBUG_VERBOSE
9229 : #if defined(DST_X) && defined(DST_Y)
9230 : if (iDstX + poWK->nDstXOff == DST_X &&
9231 : iDstY + poWK->nDstYOff == DST_Y)
9232 : {
9233 : CPLDebug("WARP", "value * weight = %.17g",
9234 : dfRealValue * dfWeight);
9235 : }
9236 : #endif
9237 : #endif
9238 :
9239 1344620 : adfRealValue[iBand] += dfRealValue * dfWeight;
9240 1344620 : adfImagValue[iBand] += dfImagValue * dfWeight;
9241 1344620 : adfBandDensity[iBand] += dfBandDensity * dfWeight;
9242 1344620 : adfWeight[iBand] += dfWeight;
9243 : }
9244 : }
9245 : }
9246 :
9247 218919 : CPLFree(pahSourcePixel);
9248 :
9249 : /* --------------------------------------------------------------------
9250 : */
9251 : /* Update destination pixel value. */
9252 : /* --------------------------------------------------------------------
9253 : */
9254 218919 : bool bHasFoundDensity = false;
9255 218919 : const GPtrDiff_t iDstOffset =
9256 218919 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
9257 827838 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
9258 : {
9259 608919 : if (adfWeight[iBand] > 0)
9260 : {
9261 : const double dfBandDensity =
9262 608909 : adfBandDensity[iBand] / adfWeight[iBand];
9263 608909 : if (dfBandDensity > BAND_DENSITY_THRESHOLD)
9264 : {
9265 608909 : bHasFoundDensity = true;
9266 608909 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
9267 608909 : adfRealValue[iBand],
9268 608909 : adfImagValue[iBand],
9269 : bAvoidNoDataSingleBand);
9270 : }
9271 : }
9272 : }
9273 :
9274 218919 : if (!bHasFoundDensity)
9275 10 : continue;
9276 :
9277 218909 : if (!bAvoidNoDataSingleBand)
9278 : {
9279 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
9280 : }
9281 :
9282 : /* --------------------------------------------------------------------
9283 : */
9284 : /* Update destination density/validity masks. */
9285 : /* --------------------------------------------------------------------
9286 : */
9287 218909 : GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
9288 :
9289 218909 : if (poWK->panDstValid != nullptr)
9290 : {
9291 11752 : CPLMaskSet(poWK->panDstValid, iDstOffset);
9292 : }
9293 : }
9294 :
9295 : /* --------------------------------------------------------------------
9296 : */
9297 : /* Report progress to the user, and optionally cancel out. */
9298 : /* --------------------------------------------------------------------
9299 : */
9300 1932 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
9301 0 : break;
9302 : }
9303 :
9304 : #ifdef CHECK_SUM_WITH_GEOS
9305 : GEOSGeom_destroy_r(hGEOSContext, hP1);
9306 : GEOSGeom_destroy_r(hGEOSContext, hP2);
9307 : OGRGeometry::freeGEOSContext(hGEOSContext);
9308 : #endif
9309 19 : CPLQuadTreeDestroy(hQuadTree);
9310 19 : }
|