Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: High Performance Image Reprojector
4 : * Purpose: Implementation of the GDALWarpKernel class. Implements the actual
5 : * image warping for a "chunk" of input and output imagery already
6 : * loaded into memory.
7 : * Author: Frank Warmerdam, warmerdam@pobox.com
8 : *
9 : ******************************************************************************
10 : * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
11 : * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
12 : *
13 : * SPDX-License-Identifier: MIT
14 : ****************************************************************************/
15 :
16 : #include "cpl_port.h"
17 : #include "gdalwarper.h"
18 :
19 : #include <cfloat>
20 : #include <cmath>
21 : #include <cstddef>
22 : #include <cstdlib>
23 : #include <cstring>
24 :
25 : #include <algorithm>
26 : #include <limits>
27 : #include <mutex>
28 : #include <new>
29 : #include <utility>
30 : #include <vector>
31 :
32 : #include "cpl_atomic_ops.h"
33 : #include "cpl_conv.h"
34 : #include "cpl_error.h"
35 : #include "cpl_float.h"
36 : #include "cpl_mask.h"
37 : #include "cpl_multiproc.h"
38 : #include "cpl_progress.h"
39 : #include "cpl_string.h"
40 : #include "cpl_vsi.h"
41 : #include "cpl_worker_thread_pool.h"
42 : #include "cpl_quad_tree.h"
43 : #include "gdal.h"
44 : #include "gdal_alg.h"
45 : #include "gdal_alg_priv.h"
46 : #include "gdal_thread_pool.h"
47 : #include "gdalresamplingkernels.h"
48 :
49 : // #define CHECK_SUM_WITH_GEOS
50 : #ifdef CHECK_SUM_WITH_GEOS
51 : #include "ogr_geometry.h"
52 : #include "ogr_geos.h"
53 : #endif
54 :
55 : #ifdef USE_NEON_OPTIMIZATIONS
56 : #include "include_sse2neon.h"
57 : #define USE_SSE2
58 :
59 : #include "gdalsse_priv.h"
60 :
61 : // We restrict to 64bit processors because they are guaranteed to have SSE2.
62 : // Could possibly be used too on 32bit, but we would need to check at runtime.
63 : #elif defined(__x86_64) || defined(_M_X64)
64 : #define USE_SSE2
65 :
66 : #include "gdalsse_priv.h"
67 :
68 : #if __SSE4_1__
69 : #include <smmintrin.h>
70 : #endif
71 :
72 : #if __SSE3__
73 : #include <pmmintrin.h>
74 : #endif
75 :
76 : #endif
77 :
78 : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
79 : constexpr float SRC_DENSITY_THRESHOLD_FLOAT = 0.000000001f;
80 : constexpr double SRC_DENSITY_THRESHOLD_DOUBLE = 0.000000001;
81 :
82 : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
83 :
84 : static const int anGWKFilterRadius[] = {
85 : 0, // Nearest neighbour
86 : 1, // Bilinear
87 : 2, // Cubic Convolution (Catmull-Rom)
88 : 2, // Cubic B-Spline
89 : 3, // Lanczos windowed sinc
90 : 0, // Average
91 : 0, // Mode
92 : 0, // Reserved GRA_Gauss=7
93 : 0, // Max
94 : 0, // Min
95 : 0, // Med
96 : 0, // Q1
97 : 0, // Q3
98 : 0, // Sum
99 : 0, // RMS
100 : };
101 :
102 : static double GWKBilinear(double dfX);
103 : static double GWKCubic(double dfX);
104 : static double GWKBSpline(double dfX);
105 : static double GWKLanczosSinc(double dfX);
106 :
107 : static const FilterFuncType apfGWKFilter[] = {
108 : nullptr, // Nearest neighbour
109 : GWKBilinear, // Bilinear
110 : GWKCubic, // Cubic Convolution (Catmull-Rom)
111 : GWKBSpline, // Cubic B-Spline
112 : GWKLanczosSinc, // Lanczos windowed sinc
113 : nullptr, // Average
114 : nullptr, // Mode
115 : nullptr, // Reserved GRA_Gauss=7
116 : nullptr, // Max
117 : nullptr, // Min
118 : nullptr, // Med
119 : nullptr, // Q1
120 : nullptr, // Q3
121 : nullptr, // Sum
122 : nullptr, // RMS
123 : };
124 :
125 : // TODO(schwehr): Can we make these functions have a const * const arg?
126 : static double GWKBilinear4Values(double *padfVals);
127 : static double GWKCubic4Values(double *padfVals);
128 : static double GWKBSpline4Values(double *padfVals);
129 : static double GWKLanczosSinc4Values(double *padfVals);
130 :
131 : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
132 : nullptr, // Nearest neighbour
133 : GWKBilinear4Values, // Bilinear
134 : GWKCubic4Values, // Cubic Convolution (Catmull-Rom)
135 : GWKBSpline4Values, // Cubic B-Spline
136 : GWKLanczosSinc4Values, // Lanczos windowed sinc
137 : nullptr, // Average
138 : nullptr, // Mode
139 : nullptr, // Reserved GRA_Gauss=7
140 : nullptr, // Max
141 : nullptr, // Min
142 : nullptr, // Med
143 : nullptr, // Q1
144 : nullptr, // Q3
145 : nullptr, // Sum
146 : nullptr, // RMS
147 : };
148 :
149 13663 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
150 : {
151 : static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
152 : "Bad size of anGWKFilterRadius");
153 13663 : return anGWKFilterRadius[eResampleAlg];
154 : }
155 :
156 5093 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
157 : {
158 : static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
159 : "Bad size of apfGWKFilter");
160 5093 : return apfGWKFilter[eResampleAlg];
161 : }
162 :
163 5093 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
164 : {
165 : static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
166 : "Bad size of apfGWKFilter4Values");
167 5093 : return apfGWKFilter4Values[eResampleAlg];
168 : }
169 :
170 : static CPLErr GWKGeneralCase(GDALWarpKernel *);
171 : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
172 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
173 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
174 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
175 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
176 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
177 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
178 : #endif
179 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
180 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
181 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
182 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
183 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
184 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
185 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
186 : #endif
187 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
188 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
189 : static CPLErr GWKNearestInt8(GDALWarpKernel *poWK);
190 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
191 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
192 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
193 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
194 : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
195 : static CPLErr GWKSumPreserving(GDALWarpKernel *);
196 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
197 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
198 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
199 :
200 : /************************************************************************/
201 : /* GWKJobStruct */
202 : /************************************************************************/
203 :
204 : struct GWKJobStruct
205 : {
206 : std::mutex &mutex;
207 : std::condition_variable &cv;
208 : int counterSingleThreaded = 0;
209 : int &counter;
210 : bool &stopFlag;
211 : GDALWarpKernel *poWK = nullptr;
212 : int iYMin = 0;
213 : int iYMax = 0;
214 : int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
215 : void *pTransformerArg = nullptr;
216 : // used by GWKRun() to assign the proper pTransformerArg
217 : void (*pfnFunc)(void *) = nullptr;
218 :
219 3178 : GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
220 : int &counter_, bool &stopFlag_)
221 3178 : : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
222 : {
223 3178 : }
224 : };
225 :
226 : struct GWKThreadData
227 : {
228 : std::unique_ptr<CPLJobQueue> poJobQueue{};
229 : std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
230 : int nMaxThreads{0};
231 : int counter{0};
232 : bool stopFlag{false};
233 : std::mutex mutex{};
234 : std::condition_variable cv{};
235 : bool bTransformerArgInputAssignedToThread{false};
236 : void *pTransformerArgInput{
237 : nullptr}; // owned by calling layer. Not to be destroyed
238 : std::map<GIntBig, void *> mapThreadToTransformerArg{};
239 : int nTotalThreadCountForThisRun = 0;
240 : int nCurThreadCountForThisRun = 0;
241 : };
242 :
243 : /************************************************************************/
244 : /* GWKProgressThread() */
245 : /************************************************************************/
246 :
247 : // Return TRUE if the computation must be interrupted.
248 36 : static int GWKProgressThread(GWKJobStruct *psJob)
249 : {
250 36 : bool stop = false;
251 : {
252 36 : std::lock_guard<std::mutex> lock(psJob->mutex);
253 36 : psJob->counter++;
254 36 : stop = psJob->stopFlag;
255 : }
256 36 : psJob->cv.notify_one();
257 :
258 36 : return stop;
259 : }
260 :
261 : /************************************************************************/
262 : /* GWKProgressMonoThread() */
263 : /************************************************************************/
264 :
265 : // Return TRUE if the computation must be interrupted.
266 435765 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
267 : {
268 435765 : GDALWarpKernel *poWK = psJob->poWK;
269 435765 : if (!poWK->pfnProgress(poWK->dfProgressBase +
270 435765 : poWK->dfProgressScale *
271 435765 : (++psJob->counterSingleThreaded /
272 435765 : static_cast<double>(psJob->iYMax)),
273 : "", poWK->pProgress))
274 : {
275 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
276 1 : psJob->stopFlag = true;
277 1 : return TRUE;
278 : }
279 435764 : return FALSE;
280 : }
281 :
282 : /************************************************************************/
283 : /* GWKGenericMonoThread() */
284 : /************************************************************************/
285 :
286 3154 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
287 : void (*pfnFunc)(void *pUserData))
288 : {
289 3154 : GWKThreadData td;
290 :
291 : // NOTE: the mutex is not used.
292 3154 : GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
293 3154 : job.poWK = poWK;
294 3154 : job.iYMin = 0;
295 3154 : job.iYMax = poWK->nDstYSize;
296 3154 : job.pfnProgress = GWKProgressMonoThread;
297 3154 : job.pTransformerArg = poWK->pTransformerArg;
298 3154 : job.counterSingleThreaded = td.counter;
299 3154 : pfnFunc(&job);
300 3154 : td.counter = job.counterSingleThreaded;
301 :
302 6308 : return td.stopFlag ? CE_Failure : CE_None;
303 : }
304 :
305 : /************************************************************************/
306 : /* GWKThreadsCreate() */
307 : /************************************************************************/
308 :
309 1772 : void *GWKThreadsCreate(char **papszWarpOptions,
310 : GDALTransformerFunc /* pfnTransformer */,
311 : void *pTransformerArg)
312 : {
313 1772 : const int nThreads = GDALGetNumThreads(papszWarpOptions, "NUM_THREADS",
314 : GDAL_DEFAULT_MAX_THREAD_COUNT,
315 : /* bDefaultAllCPUs = */ false);
316 1772 : GWKThreadData *psThreadData = new GWKThreadData();
317 : auto poThreadPool =
318 1772 : nThreads > 1 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
319 1772 : if (poThreadPool)
320 : {
321 24 : psThreadData->nMaxThreads = nThreads;
322 24 : psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
323 : nThreads,
324 24 : GWKJobStruct(psThreadData->mutex, psThreadData->cv,
325 48 : psThreadData->counter, psThreadData->stopFlag)));
326 :
327 24 : psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
328 24 : psThreadData->pTransformerArgInput = pTransformerArg;
329 : }
330 :
331 1772 : return psThreadData;
332 : }
333 :
334 : /************************************************************************/
335 : /* GWKThreadsEnd() */
336 : /************************************************************************/
337 :
338 1772 : void GWKThreadsEnd(void *psThreadDataIn)
339 : {
340 1772 : if (psThreadDataIn == nullptr)
341 0 : return;
342 :
343 1772 : GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
344 1772 : if (psThreadData->poJobQueue)
345 : {
346 : // cppcheck-suppress constVariableReference
347 34 : for (auto &pair : psThreadData->mapThreadToTransformerArg)
348 : {
349 10 : CPLAssert(pair.second != psThreadData->pTransformerArgInput);
350 10 : GDALDestroyTransformer(pair.second);
351 : }
352 24 : psThreadData->poJobQueue.reset();
353 : }
354 1772 : delete psThreadData;
355 : }
356 :
357 : /************************************************************************/
358 : /* ThreadFuncAdapter() */
359 : /************************************************************************/
360 :
361 33 : static void ThreadFuncAdapter(void *pData)
362 : {
363 33 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
364 33 : GWKThreadData *psThreadData =
365 33 : static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
366 :
367 : // Look if we have already a per-thread transformer
368 33 : void *pTransformerArg = nullptr;
369 33 : const GIntBig nThreadId = CPLGetPID();
370 :
371 : {
372 66 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
373 33 : ++psThreadData->nCurThreadCountForThisRun;
374 :
375 33 : auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
376 33 : if (oIter != psThreadData->mapThreadToTransformerArg.end())
377 : {
378 0 : pTransformerArg = oIter->second;
379 : }
380 33 : else if (!psThreadData->bTransformerArgInputAssignedToThread &&
381 33 : psThreadData->nCurThreadCountForThisRun ==
382 33 : psThreadData->nTotalThreadCountForThisRun)
383 : {
384 : // If we are the last thread to be started, temporarily borrow the
385 : // original transformer
386 23 : psThreadData->bTransformerArgInputAssignedToThread = true;
387 23 : pTransformerArg = psThreadData->pTransformerArgInput;
388 23 : psThreadData->mapThreadToTransformerArg[nThreadId] =
389 : pTransformerArg;
390 : }
391 :
392 33 : if (pTransformerArg == nullptr)
393 : {
394 10 : CPLAssert(psThreadData->pTransformerArgInput != nullptr);
395 10 : CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
396 : }
397 : }
398 :
399 : // If no transformer assigned to current thread, instantiate one
400 33 : if (pTransformerArg == nullptr)
401 : {
402 : // This somehow assumes that GDALCloneTransformer() is thread-safe
403 : // which should normally be the case.
404 : pTransformerArg =
405 10 : GDALCloneTransformer(psThreadData->pTransformerArgInput);
406 :
407 : // Lock for the stop flag and the transformer map.
408 10 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
409 10 : if (!pTransformerArg)
410 : {
411 0 : psJob->stopFlag = true;
412 0 : return;
413 : }
414 10 : psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
415 : }
416 :
417 33 : psJob->pTransformerArg = pTransformerArg;
418 33 : psJob->pfnFunc(pData);
419 :
420 : // Give back original transformer, if borrowed.
421 : {
422 66 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
423 33 : if (psThreadData->bTransformerArgInputAssignedToThread &&
424 25 : pTransformerArg == psThreadData->pTransformerArgInput)
425 : {
426 : psThreadData->mapThreadToTransformerArg.erase(
427 23 : psThreadData->mapThreadToTransformerArg.find(nThreadId));
428 23 : psThreadData->bTransformerArgInputAssignedToThread = false;
429 : }
430 : }
431 : }
432 :
433 : /************************************************************************/
434 : /* GWKRun() */
435 : /************************************************************************/
436 :
437 3177 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
438 : void (*pfnFunc)(void *pUserData))
439 :
440 : {
441 3177 : const int nDstYSize = poWK->nDstYSize;
442 :
443 3177 : CPLDebug("GDAL",
444 : "GDALWarpKernel()::%s() "
445 : "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
446 : pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
447 : poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
448 : poWK->nDstYSize);
449 :
450 3177 : if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
451 : {
452 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
453 0 : return CE_Failure;
454 : }
455 :
456 3177 : GWKThreadData *psThreadData =
457 : static_cast<GWKThreadData *>(poWK->psThreadData);
458 3177 : if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
459 : {
460 3154 : return GWKGenericMonoThread(poWK, pfnFunc);
461 : }
462 :
463 23 : int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
464 : // Config option mostly useful for tests to be able to test multithreading
465 : // with small rasters
466 : const int nWarpChunkSize =
467 23 : atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
468 23 : if (nWarpChunkSize > 0)
469 : {
470 21 : GIntBig nChunks =
471 21 : static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
472 21 : if (nThreads > nChunks)
473 16 : nThreads = static_cast<int>(nChunks);
474 : }
475 23 : if (nThreads <= 0)
476 19 : nThreads = 1;
477 :
478 23 : CPLDebug("WARP", "Using %d threads", nThreads);
479 :
480 23 : auto &jobs = *psThreadData->threadJobs;
481 23 : CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
482 : // Fill-in job structures.
483 56 : for (int i = 0; i < nThreads; ++i)
484 : {
485 33 : auto &job = jobs[i];
486 33 : job.poWK = poWK;
487 33 : job.iYMin =
488 33 : static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
489 33 : job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
490 33 : nThreads);
491 33 : if (poWK->pfnProgress != GDALDummyProgress)
492 2 : job.pfnProgress = GWKProgressThread;
493 33 : job.pfnFunc = pfnFunc;
494 : }
495 :
496 : bool bStopFlag;
497 : {
498 23 : std::unique_lock<std::mutex> lock(psThreadData->mutex);
499 :
500 23 : psThreadData->nTotalThreadCountForThisRun = nThreads;
501 : // coverity[missing_lock]
502 23 : psThreadData->nCurThreadCountForThisRun = 0;
503 :
504 : // Start jobs.
505 56 : for (int i = 0; i < nThreads; ++i)
506 : {
507 33 : auto &job = jobs[i];
508 33 : psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
509 : static_cast<void *>(&job));
510 : }
511 :
512 : /* --------------------------------------------------------------------
513 : */
514 : /* Report progress. */
515 : /* --------------------------------------------------------------------
516 : */
517 23 : if (poWK->pfnProgress != GDALDummyProgress)
518 : {
519 4 : while (psThreadData->counter < nDstYSize)
520 : {
521 3 : psThreadData->cv.wait(lock);
522 3 : if (!poWK->pfnProgress(poWK->dfProgressBase +
523 3 : poWK->dfProgressScale *
524 3 : (psThreadData->counter /
525 3 : static_cast<double>(nDstYSize)),
526 : "", poWK->pProgress))
527 : {
528 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
529 1 : psThreadData->stopFlag = true;
530 1 : break;
531 : }
532 : }
533 : }
534 :
535 23 : bStopFlag = psThreadData->stopFlag;
536 : }
537 :
538 : /* -------------------------------------------------------------------- */
539 : /* Wait for all jobs to complete. */
540 : /* -------------------------------------------------------------------- */
541 23 : psThreadData->poJobQueue->WaitCompletion();
542 :
543 23 : return bStopFlag ? CE_Failure : CE_None;
544 : }
545 :
546 : /************************************************************************/
547 : /* ==================================================================== */
548 : /* GDALWarpKernel */
549 : /* ==================================================================== */
550 : /************************************************************************/
551 :
552 : /**
553 : * \class GDALWarpKernel "gdalwarper.h"
554 : *
555 : * Low level image warping class.
556 : *
557 : * This class is responsible for low level image warping for one
558 : * "chunk" of imagery. The class is essentially a structure with all
559 : * data members public - primarily so that new special-case functions
560 : * can be added without changing the class declaration.
561 : *
562 : * Applications are normally intended to interactive with warping facilities
563 : * through the GDALWarpOperation class, though the GDALWarpKernel can in
564 : * theory be used directly if great care is taken in setting up the
565 : * control data.
566 : *
567 : * <h3>Design Issues</h3>
568 : *
569 : * The intention is that PerformWarp() would analyze the setup in terms
570 : * of the datatype, resampling type, and validity/density mask usage and
571 : * pick one of many specific implementations of the warping algorithm over
572 : * a continuum of optimization vs. generality. At one end there will be a
573 : * reference general purpose implementation of the algorithm that supports
574 : * any data type (working internally in double precision complex), all three
575 : * resampling types, and any or all of the validity/density masks. At the
576 : * other end would be highly optimized algorithms for common cases like
577 : * nearest neighbour resampling on GDT_UInt8 data with no masks.
578 : *
579 : * The full set of optimized versions have not been decided but we should
580 : * expect to have at least:
581 : * - One for each resampling algorithm for 8bit data with no masks.
582 : * - One for each resampling algorithm for float data with no masks.
583 : * - One for each resampling algorithm for float data with any/all masks
584 : * (essentially the generic case for just float data).
585 : * - One for each resampling algorithm for 8bit data with support for
586 : * input validity masks (per band or per pixel). This handles the common
587 : * case of nodata masking.
588 : * - One for each resampling algorithm for float data with support for
589 : * input validity masks (per band or per pixel). This handles the common
590 : * case of nodata masking.
591 : *
592 : * Some of the specializations would operate on all bands in one pass
593 : * (especially the ones without masking would do this), while others might
594 : * process each band individually to reduce code complexity.
595 : *
596 : * <h3>Masking Semantics</h3>
597 : *
598 : * A detailed explanation of the semantics of the validity and density masks,
599 : * and their effects on resampling kernels is needed here.
600 : */
601 :
602 : /************************************************************************/
603 : /* GDALWarpKernel Data Members */
604 : /************************************************************************/
605 :
606 : /**
607 : * \var GDALResampleAlg GDALWarpKernel::eResample;
608 : *
609 : * Resampling algorithm.
610 : *
611 : * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear,
612 : * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
613 : * GRA_Mode or GRA_Sum.
614 : *
615 : * This field is required. GDT_NearestNeighbour may be used as a default
616 : * value.
617 : */
618 :
619 : /**
620 : * \var GDALDataType GDALWarpKernel::eWorkingDataType;
621 : *
622 : * Working pixel data type.
623 : *
624 : * The datatype of pixels in the source image (papabySrcimage) and
625 : * destination image (papabyDstImage) buffers. Note that operations on
626 : * some data types (such as GDT_UInt8) may be much better optimized than other
627 : * less common cases.
628 : *
629 : * This field is required. It may not be GDT_Unknown.
630 : */
631 :
632 : /**
633 : * \var int GDALWarpKernel::nBands;
634 : *
635 : * Number of bands.
636 : *
637 : * The number of bands (layers) of imagery being warped. Determines the
638 : * number of entries in the papabySrcImage, papanBandSrcValid,
639 : * and papabyDstImage arrays.
640 : *
641 : * This field is required.
642 : */
643 :
644 : /**
645 : * \var int GDALWarpKernel::nSrcXSize;
646 : *
647 : * Source image width in pixels.
648 : *
649 : * This field is required.
650 : */
651 :
652 : /**
653 : * \var int GDALWarpKernel::nSrcYSize;
654 : *
655 : * Source image height in pixels.
656 : *
657 : * This field is required.
658 : */
659 :
660 : /**
661 : * \var double GDALWarpKernel::dfSrcXExtraSize;
662 : *
663 : * Number of pixels included in nSrcXSize that are present on the edges of
664 : * the area of interest to take into account the width of the kernel.
665 : *
666 : * This field is required.
667 : */
668 :
669 : /**
670 : * \var double GDALWarpKernel::dfSrcYExtraSize;
671 : *
672 : * Number of pixels included in nSrcYExtraSize that are present on the edges of
673 : * the area of interest to take into account the height of the kernel.
674 : *
675 : * This field is required.
676 : */
677 :
678 : /**
679 : * \var int GDALWarpKernel::papabySrcImage;
680 : *
681 : * Array of source image band data.
682 : *
683 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
684 : * to image data. Each individual band of image data is organized as a single
685 : * block of image data in left to right, then bottom to top order. The actual
686 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
687 : *
688 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
689 : * the second band with eWorkingDataType set to GDT_Float32 use code like
690 : * this:
691 : *
692 : * \code
693 : * float dfPixelValue;
694 : * int nBand = 2-1; // Band indexes are zero based.
695 : * int nPixel = 3; // Zero based.
696 : * int nLine = 4; // Zero based.
697 : *
698 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
699 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
700 : * assert( nBand >= 0 && nBand < poKern->nBands );
701 : * dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
702 : * [nPixel + nLine * poKern->nSrcXSize];
703 : * \endcode
704 : *
705 : * This field is required.
706 : */
707 :
708 : /**
709 : * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
710 : *
711 : * Per band validity mask for source pixels.
712 : *
713 : * Array of pixel validity mask layers for each source band. Each of
714 : * the mask layers is the same size (in pixels) as the source image with
715 : * one bit per pixel. Note that it is legal (and common) for this to be
716 : * NULL indicating that none of the pixels are invalidated, or for some
717 : * band validity masks to be NULL in which case all pixels of the band are
718 : * valid. The following code can be used to test the validity of a particular
719 : * pixel.
720 : *
721 : * \code
722 : * int bIsValid = TRUE;
723 : * int nBand = 2-1; // Band indexes are zero based.
724 : * int nPixel = 3; // Zero based.
725 : * int nLine = 4; // Zero based.
726 : *
727 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
728 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
729 : * assert( nBand >= 0 && nBand < poKern->nBands );
730 : *
731 : * if( poKern->papanBandSrcValid != NULL
732 : * && poKern->papanBandSrcValid[nBand] != NULL )
733 : * {
734 : * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
735 : * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
736 : *
737 : * bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
738 : * }
739 : * \endcode
740 : */
741 :
742 : /**
743 : * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
744 : *
745 : * Per pixel validity mask for source pixels.
746 : *
747 : * A single validity mask layer that applies to the pixels of all source
748 : * bands. It is accessed similarly to papanBandSrcValid, but without the
749 : * extra level of band indirection.
750 : *
751 : * This pointer may be NULL indicating that all pixels are valid.
752 : *
753 : * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
754 : * the pixel isn't considered to be valid unless both arrays indicate it is
755 : * valid.
756 : */
757 :
758 : /**
759 : * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
760 : *
761 : * Per pixel density mask for source pixels.
762 : *
763 : * A single density mask layer that applies to the pixels of all source
764 : * bands. It contains values between 0.0 and 1.0 indicating the degree to
765 : * which this pixel should be allowed to contribute to the output result.
766 : *
767 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
768 : *
769 : * The density for a pixel may be accessed like this:
770 : *
771 : * \code
772 : * float fDensity = 1.0;
773 : * int nPixel = 3; // Zero based.
774 : * int nLine = 4; // Zero based.
775 : *
776 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
777 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
778 : * if( poKern->pafUnifiedSrcDensity != NULL )
779 : * fDensity = poKern->pafUnifiedSrcDensity
780 : * [nPixel + nLine * poKern->nSrcXSize];
781 : * \endcode
782 : */
783 :
784 : /**
785 : * \var int GDALWarpKernel::nDstXSize;
786 : *
787 : * Width of destination image in pixels.
788 : *
789 : * This field is required.
790 : */
791 :
792 : /**
793 : * \var int GDALWarpKernel::nDstYSize;
794 : *
795 : * Height of destination image in pixels.
796 : *
797 : * This field is required.
798 : */
799 :
800 : /**
801 : * \var GByte **GDALWarpKernel::papabyDstImage;
802 : *
803 : * Array of destination image band data.
804 : *
805 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
806 : * to image data. Each individual band of image data is organized as a single
807 : * block of image data in left to right, then bottom to top order. The actual
808 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
809 : *
810 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
811 : * the second band with eWorkingDataType set to GDT_Float32 use code like
812 : * this:
813 : *
814 : * \code
815 : * float dfPixelValue;
816 : * int nBand = 2-1; // Band indexes are zero based.
817 : * int nPixel = 3; // Zero based.
818 : * int nLine = 4; // Zero based.
819 : *
820 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
821 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
822 : * assert( nBand >= 0 && nBand < poKern->nBands );
823 : * dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
824 : * [nPixel + nLine * poKern->nSrcYSize];
825 : * \endcode
826 : *
827 : * This field is required.
828 : */
829 :
830 : /**
831 : * \var GUInt32 *GDALWarpKernel::panDstValid;
832 : *
833 : * Per pixel validity mask for destination pixels.
834 : *
835 : * A single validity mask layer that applies to the pixels of all destination
836 : * bands. It is accessed similarly to papanUnitifiedSrcValid, but based
837 : * on the size of the destination image.
838 : *
839 : * This pointer may be NULL indicating that all pixels are valid.
840 : */
841 :
842 : /**
843 : * \var float *GDALWarpKernel::pafDstDensity;
844 : *
845 : * Per pixel density mask for destination pixels.
846 : *
847 : * A single density mask layer that applies to the pixels of all destination
848 : * bands. It contains values between 0.0 and 1.0.
849 : *
850 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
851 : *
852 : * The density for a pixel may be accessed like this:
853 : *
854 : * \code
855 : * float fDensity = 1.0;
856 : * int nPixel = 3; // Zero based.
857 : * int nLine = 4; // Zero based.
858 : *
859 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
860 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
861 : * if( poKern->pafDstDensity != NULL )
862 : * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
863 : * \endcode
864 : */
865 :
866 : /**
867 : * \var int GDALWarpKernel::nSrcXOff;
868 : *
869 : * X offset to source pixel coordinates for transformation.
870 : *
871 : * See pfnTransformer.
872 : *
873 : * This field is required.
874 : */
875 :
876 : /**
877 : * \var int GDALWarpKernel::nSrcYOff;
878 : *
879 : * Y offset to source pixel coordinates for transformation.
880 : *
881 : * See pfnTransformer.
882 : *
883 : * This field is required.
884 : */
885 :
886 : /**
887 : * \var int GDALWarpKernel::nDstXOff;
888 : *
889 : * X offset to destination pixel coordinates for transformation.
890 : *
891 : * See pfnTransformer.
892 : *
893 : * This field is required.
894 : */
895 :
896 : /**
897 : * \var int GDALWarpKernel::nDstYOff;
898 : *
899 : * Y offset to destination pixel coordinates for transformation.
900 : *
901 : * See pfnTransformer.
902 : *
903 : * This field is required.
904 : */
905 :
906 : /**
907 : * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
908 : *
909 : * Source/destination location transformer.
910 : *
911 : * The function to call to transform coordinates between source image
912 : * pixel/line coordinates and destination image pixel/line coordinates.
913 : * See GDALTransformerFunc() for details of the semantics of this function.
914 : *
915 : * The GDALWarpKern algorithm will only ever use this transformer in
916 : * "destination to source" mode (bDstToSrc=TRUE), and will always pass
917 : * partial or complete scanlines of points in the destination image as
918 : * input. This means, among other things, that it is safe to the
919 : * approximating transform GDALApproxTransform() as the transformation
920 : * function.
921 : *
922 : * Source and destination images may be subsets of a larger overall image.
923 : * The transformation algorithms will expect and return pixel/line coordinates
924 : * in terms of this larger image, so coordinates need to be offset by
925 : * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
926 : * passing to pfnTransformer, and after return from it.
927 : *
928 : * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
929 : * data to this function when it is called.
930 : *
931 : * This field is required.
932 : */
933 :
934 : /**
935 : * \var void *GDALWarpKernel::pTransformerArg;
936 : *
937 : * Callback data for pfnTransformer.
938 : *
939 : * This field may be NULL if not required for the pfnTransformer being used.
940 : */
941 :
942 : /**
943 : * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
944 : *
945 : * The function to call to report progress of the algorithm, and to check
946 : * for a requested termination of the operation. It operates according to
947 : * GDALProgressFunc() semantics.
948 : *
949 : * Generally speaking the progress function will be invoked for each
950 : * scanline of the destination buffer that has been processed.
951 : *
952 : * This field may be NULL (internally set to GDALDummyProgress()).
953 : */
954 :
955 : /**
956 : * \var void *GDALWarpKernel::pProgress;
957 : *
958 : * Callback data for pfnProgress.
959 : *
960 : * This field may be NULL if not required for the pfnProgress being used.
961 : */
962 :
963 : /************************************************************************/
964 : /* GDALWarpKernel() */
965 : /************************************************************************/
966 :
967 3795 : GDALWarpKernel::GDALWarpKernel()
968 : : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
969 : eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
970 : dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
971 : papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
972 : pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
973 : papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
974 : dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
975 : nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
976 : nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
977 : pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
978 : pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
979 : padfDstNoDataReal(nullptr), psThreadData(nullptr),
980 3795 : eTieStrategy(GWKTS_First)
981 : {
982 3795 : }
983 :
984 : /************************************************************************/
985 : /* ~GDALWarpKernel() */
986 : /************************************************************************/
987 :
988 3795 : GDALWarpKernel::~GDALWarpKernel()
989 : {
990 3795 : }
991 :
992 : /************************************************************************/
993 : /* getArea() */
994 : /************************************************************************/
995 :
996 : typedef std::pair<double, double> XYPair;
997 :
998 : typedef std::vector<XYPair> XYPoly;
999 :
1000 : // poly may or may not be closed.
1001 565793 : static double getArea(const XYPoly &poly)
1002 : {
1003 : // CPLAssert(poly.size() >= 2);
1004 565793 : const size_t nPointCount = poly.size();
1005 : double dfAreaSum =
1006 565793 : poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
1007 :
1008 1786950 : for (size_t i = 1; i < nPointCount - 1; i++)
1009 : {
1010 1221160 : dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
1011 : }
1012 :
1013 565793 : dfAreaSum += poly[nPointCount - 1].first *
1014 565793 : (poly[0].second - poly[nPointCount - 2].second);
1015 :
1016 565793 : return 0.5 * std::fabs(dfAreaSum);
1017 : }
1018 :
1019 : /************************************************************************/
1020 : /* CanUse4SamplesFormula() */
1021 : /************************************************************************/
1022 :
1023 4663 : static bool CanUse4SamplesFormula(const GDALWarpKernel *poWK)
1024 : {
1025 4663 : if (poWK->eResample == GRA_Bilinear || poWK->eResample == GRA_Cubic)
1026 : {
1027 : // Use 4-sample formula if we are not downsampling by more than a
1028 : // factor of 1:2
1029 2637 : if (poWK->dfXScale > 0.5 && poWK->dfYScale > 0.5)
1030 2201 : return true;
1031 436 : CPLDebugOnce("WARP",
1032 : "Not using 4-sample bilinear/bicubic formula because "
1033 : "XSCALE(=%f) and/or YSCALE(=%f) <= 0.5",
1034 : poWK->dfXScale, poWK->dfYScale);
1035 : }
1036 2462 : return false;
1037 : }
1038 :
1039 : /************************************************************************/
1040 : /* PerformWarp() */
1041 : /************************************************************************/
1042 :
1043 : /**
1044 : * \fn CPLErr GDALWarpKernel::PerformWarp();
1045 : *
1046 : * This method performs the warp described in the GDALWarpKernel.
1047 : *
1048 : * @return CE_None on success or CE_Failure if an error occurs.
1049 : */
1050 :
1051 3791 : CPLErr GDALWarpKernel::PerformWarp()
1052 :
1053 : {
1054 3791 : const CPLErr eErr = Validate();
1055 :
1056 3791 : if (eErr != CE_None)
1057 1 : return eErr;
1058 :
1059 : // See #2445 and #3079.
1060 3790 : if (nSrcXSize <= 0 || nSrcYSize <= 0)
1061 : {
1062 613 : if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
1063 : {
1064 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
1065 0 : return CE_Failure;
1066 : }
1067 613 : return CE_None;
1068 : }
1069 :
1070 : /* -------------------------------------------------------------------- */
1071 : /* Pre-calculate resampling scales and window sizes for filtering. */
1072 : /* -------------------------------------------------------------------- */
1073 :
1074 3177 : dfXScale = 0.0;
1075 3177 : dfYScale = 0.0;
1076 :
1077 : // XSCALE and YSCALE per warping chunk is not necessarily ideal, in case of
1078 : // heterogeneous change in shapes.
1079 : // Best would probably be a per-pixel scale computation.
1080 3177 : const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
1081 3177 : const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
1082 3177 : if (!pszXScale || !pszYScale)
1083 : {
1084 : // Sample points along a grid in the destination space
1085 3176 : constexpr int MAX_POINTS_PER_DIM = 10;
1086 3176 : const int nPointsX = std::min(MAX_POINTS_PER_DIM, nDstXSize);
1087 3176 : const int nPointsY = std::min(MAX_POINTS_PER_DIM, nDstYSize);
1088 3176 : constexpr int CORNER_COUNT_PER_SQUARE = 4;
1089 3176 : const int nPoints = CORNER_COUNT_PER_SQUARE * nPointsX * nPointsY;
1090 6352 : std::vector<double> adfX;
1091 6352 : std::vector<double> adfY;
1092 3176 : adfX.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
1093 3176 : adfY.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
1094 6352 : std::vector<double> adfZ(CORNER_COUNT_PER_SQUARE * nPoints);
1095 6352 : std::vector<int> abSuccess(CORNER_COUNT_PER_SQUARE * nPoints);
1096 30621 : for (int iY = 0; iY < nPointsY; iY++)
1097 : {
1098 27445 : const double dfYShift = (iY > 0 && iY == nPointsY - 1) ? -1.0 : 0.0;
1099 27445 : const double dfY =
1100 27445 : dfYShift + (nPointsY == 1 ? 0.0
1101 27229 : : static_cast<double>(iY) *
1102 27229 : nDstYSize / (nPointsY - 1));
1103 :
1104 291550 : for (int iX = 0; iX < nPointsX; iX++)
1105 : {
1106 264105 : const double dfXShift =
1107 264105 : (iX > 0 && iX == nPointsX - 1) ? -1.0 : 0.0;
1108 :
1109 264105 : const double dfX =
1110 264105 : dfXShift + (nPointsX == 1 ? 0.0
1111 263903 : : static_cast<double>(iX) *
1112 263903 : nDstXSize / (nPointsX - 1));
1113 :
1114 : // Reproject a unit square at each sample point
1115 264105 : adfX.push_back(dfX);
1116 264105 : adfY.push_back(dfY);
1117 :
1118 264105 : adfX.push_back(dfX + 1);
1119 264105 : adfY.push_back(dfY);
1120 :
1121 264105 : adfX.push_back(dfX);
1122 264105 : adfY.push_back(dfY + 1);
1123 :
1124 264105 : adfX.push_back(dfX + 1);
1125 264105 : adfY.push_back(dfY + 1);
1126 : }
1127 : }
1128 3176 : pfnTransformer(pTransformerArg, TRUE, static_cast<int>(adfX.size()),
1129 : adfX.data(), adfY.data(), adfZ.data(), abSuccess.data());
1130 :
1131 6352 : std::vector<XYPair> adfXYScales;
1132 3176 : adfXYScales.reserve(nPoints);
1133 267281 : for (int i = 0; i < nPoints; i += CORNER_COUNT_PER_SQUARE)
1134 : {
1135 527094 : if (abSuccess[i + 0] && abSuccess[i + 1] && abSuccess[i + 2] &&
1136 262989 : abSuccess[i + 3])
1137 : {
1138 2103900 : const auto square = [](double x) { return x * x; };
1139 :
1140 262987 : const double vx01 = adfX[i + 1] - adfX[i + 0];
1141 262987 : const double vy01 = adfY[i + 1] - adfY[i + 0];
1142 262987 : const double len01_sq = square(vx01) + square(vy01);
1143 :
1144 262987 : const double vx23 = adfX[i + 3] - adfX[i + 2];
1145 262987 : const double vy23 = adfY[i + 3] - adfY[i + 2];
1146 262987 : const double len23_sq = square(vx23) + square(vy23);
1147 :
1148 262987 : const double vx02 = adfX[i + 2] - adfX[i + 0];
1149 262987 : const double vy02 = adfY[i + 2] - adfY[i + 0];
1150 262987 : const double len02_sq = square(vx02) + square(vy02);
1151 :
1152 262987 : const double vx13 = adfX[i + 3] - adfX[i + 1];
1153 262987 : const double vy13 = adfY[i + 3] - adfY[i + 1];
1154 262987 : const double len13_sq = square(vx13) + square(vy13);
1155 :
1156 : // ~ 20 degree, heuristic
1157 262987 : constexpr double TAN_MODEST_ANGLE = 0.35;
1158 :
1159 : // 10%, heuristic
1160 262987 : constexpr double LENGTH_RELATIVE_TOLERANCE = 0.1;
1161 :
1162 : // Security margin to avoid division by zero (would only
1163 : // happen in case of degenerated coordinate transformation,
1164 : // or insane upsampling)
1165 262987 : constexpr double EPSILON = 1e-10;
1166 :
1167 : // Does the transformed square looks like an almost non-rotated
1168 : // quasi-rectangle ?
1169 262987 : if (std::fabs(vy01) < TAN_MODEST_ANGLE * vx01 &&
1170 255870 : std::fabs(vy23) < TAN_MODEST_ANGLE * vx23 &&
1171 255843 : std::fabs(len01_sq - len23_sq) <
1172 255843 : LENGTH_RELATIVE_TOLERANCE * std::fabs(len01_sq) &&
1173 255730 : std::fabs(len02_sq - len13_sq) <
1174 255730 : LENGTH_RELATIVE_TOLERANCE * std::fabs(len02_sq))
1175 : {
1176 : // Using a geometric average here of lenAB_sq and lenCD_sq,
1177 : // hence a sqrt(), and as this is still a squared value,
1178 : // we need another sqrt() to get a distance.
1179 : const double dfXLength =
1180 255715 : std::sqrt(std::sqrt(len01_sq * len23_sq));
1181 : const double dfYLength =
1182 255715 : std::sqrt(std::sqrt(len02_sq * len13_sq));
1183 255715 : if (dfXLength > EPSILON && dfYLength > EPSILON)
1184 : {
1185 255715 : const double dfThisXScale = 1.0 / dfXLength;
1186 255715 : const double dfThisYScale = 1.0 / dfYLength;
1187 255715 : adfXYScales.push_back({dfThisXScale, dfThisYScale});
1188 255715 : }
1189 : }
1190 : else
1191 : {
1192 : // If not, then consider the area of the transformed unit
1193 : // square to determine the X/Y scales.
1194 7272 : const XYPoly poly{{adfX[i + 0], adfY[i + 0]},
1195 7272 : {adfX[i + 1], adfY[i + 1]},
1196 7272 : {adfX[i + 3], adfY[i + 3]},
1197 29088 : {adfX[i + 2], adfY[i + 2]}};
1198 7272 : const double dfSrcArea = getArea(poly);
1199 7272 : const double dfFactor = std::sqrt(dfSrcArea);
1200 7272 : if (dfFactor > EPSILON)
1201 : {
1202 7272 : const double dfThisXScale = 1.0 / dfFactor;
1203 7272 : const double dfThisYScale = dfThisXScale;
1204 7272 : adfXYScales.push_back({dfThisXScale, dfThisYScale});
1205 : }
1206 : }
1207 : }
1208 : }
1209 :
1210 3176 : if (!adfXYScales.empty())
1211 : {
1212 : // Sort by increasing xscale * yscale
1213 3176 : std::sort(adfXYScales.begin(), adfXYScales.end(),
1214 1429620 : [](const XYPair &a, const XYPair &b)
1215 1429620 : { return a.first * a.second < b.first * b.second; });
1216 :
1217 : // Compute the per-axis maximum of scale
1218 3176 : double dfXMax = 0;
1219 3176 : double dfYMax = 0;
1220 266163 : for (const auto &[dfX, dfY] : adfXYScales)
1221 : {
1222 262987 : dfXMax = std::max(dfXMax, dfX);
1223 262987 : dfYMax = std::max(dfYMax, dfY);
1224 : }
1225 :
1226 : // Now eliminate outliers, defined as ones whose value is < 10% of
1227 : // the maximum value, typically found at a polar discontinuity, and
1228 : // compute the average of non-outlier values.
1229 3176 : dfXScale = 0;
1230 3176 : dfYScale = 0;
1231 3176 : int i = 0;
1232 3176 : constexpr double THRESHOLD = 0.1; // 10%, rather arbitrary
1233 266163 : for (const auto &[dfX, dfY] : adfXYScales)
1234 : {
1235 262987 : if (dfX > THRESHOLD * dfXMax && dfY > THRESHOLD * dfYMax)
1236 : {
1237 260139 : ++i;
1238 260139 : const double dfXDelta = dfX - dfXScale;
1239 260139 : const double dfYDelta = dfY - dfYScale;
1240 260139 : const double dfInvI = 1.0 / i;
1241 260139 : dfXScale += dfXDelta * dfInvI;
1242 260139 : dfYScale += dfYDelta * dfInvI;
1243 : }
1244 : }
1245 : }
1246 : }
1247 :
1248 : // Round to closest integer reciprocal scale if we are very close to it
1249 : const auto RoundToClosestIntegerReciprocalScaleIfCloseEnough =
1250 6354 : [](double dfScale)
1251 : {
1252 6354 : if (dfScale < 1.0)
1253 : {
1254 2552 : double dfReciprocalScale = 1.0 / dfScale;
1255 2552 : const int nReciprocalScale =
1256 2552 : static_cast<int>(dfReciprocalScale + 0.5);
1257 2552 : if (fabs(dfReciprocalScale - nReciprocalScale) < 0.05)
1258 2110 : dfScale = 1.0 / nReciprocalScale;
1259 : }
1260 6354 : return dfScale;
1261 : };
1262 :
1263 3177 : if (dfXScale <= 0)
1264 1 : dfXScale = 1.0;
1265 3177 : if (dfYScale <= 0)
1266 1 : dfYScale = 1.0;
1267 :
1268 3177 : dfXScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfXScale);
1269 3177 : dfYScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfYScale);
1270 :
1271 3177 : if (pszXScale != nullptr)
1272 1 : dfXScale = CPLAtof(pszXScale);
1273 3177 : if (pszYScale != nullptr)
1274 1 : dfYScale = CPLAtof(pszYScale);
1275 :
1276 3177 : if (!pszXScale || !pszYScale)
1277 3176 : CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
1278 :
1279 3177 : const int bUse4SamplesFormula = CanUse4SamplesFormula(this);
1280 :
1281 : // Safety check for callers that would use GDALWarpKernel without using
1282 : // GDALWarpOperation.
1283 3114 : if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
1284 3049 : ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
1285 6354 : !bUse4SamplesFormula)) &&
1286 346 : atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
1287 : WARP_EXTRA_ELTS)
1288 : {
1289 0 : CPLError(CE_Failure, CPLE_AppDefined,
1290 : "Source arrays must have WARP_EXTRA_ELTS extra elements at "
1291 : "their end. "
1292 : "See GDALWarpKernel class definition. If this condition is "
1293 : "fulfilled, define a EXTRA_ELTS=%d warp options",
1294 : WARP_EXTRA_ELTS);
1295 0 : return CE_Failure;
1296 : }
1297 :
1298 3177 : dfXFilter = anGWKFilterRadius[eResample];
1299 3177 : dfYFilter = anGWKFilterRadius[eResample];
1300 :
1301 3177 : nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
1302 2591 : : static_cast<int>(dfXFilter);
1303 3177 : nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
1304 2608 : : static_cast<int>(dfYFilter);
1305 :
1306 : // Filter window offset depends on the parity of the kernel radius.
1307 3177 : nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
1308 3177 : nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
1309 :
1310 3177 : bApplyVerticalShift =
1311 3177 : CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
1312 3177 : dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
1313 3177 : papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
1314 :
1315 : /* -------------------------------------------------------------------- */
1316 : /* Set up resampling functions. */
1317 : /* -------------------------------------------------------------------- */
1318 3177 : if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
1319 12 : return GWKGeneralCase(this);
1320 :
1321 3165 : const bool bNoMasksOrDstDensityOnly =
1322 3155 : papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
1323 6320 : pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
1324 :
1325 3165 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour &&
1326 : bNoMasksOrDstDensityOnly)
1327 909 : return GWKNearestNoMasksOrDstDensityOnlyByte(this);
1328 :
1329 2256 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Bilinear &&
1330 : bNoMasksOrDstDensityOnly)
1331 128 : return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
1332 :
1333 2128 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Cubic &&
1334 : bNoMasksOrDstDensityOnly)
1335 850 : return GWKCubicNoMasksOrDstDensityOnlyByte(this);
1336 :
1337 1278 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_CubicSpline &&
1338 : bNoMasksOrDstDensityOnly)
1339 12 : return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
1340 :
1341 1266 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour)
1342 363 : return GWKNearestByte(this);
1343 :
1344 903 : if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1345 154 : eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
1346 14 : return GWKNearestNoMasksOrDstDensityOnlyShort(this);
1347 :
1348 889 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
1349 : bNoMasksOrDstDensityOnly)
1350 5 : return GWKCubicNoMasksOrDstDensityOnlyShort(this);
1351 :
1352 884 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
1353 : bNoMasksOrDstDensityOnly)
1354 6 : return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
1355 :
1356 878 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
1357 : bNoMasksOrDstDensityOnly)
1358 5 : return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
1359 :
1360 873 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
1361 : bNoMasksOrDstDensityOnly)
1362 14 : return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
1363 :
1364 859 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
1365 : bNoMasksOrDstDensityOnly)
1366 5 : return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
1367 :
1368 854 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
1369 : bNoMasksOrDstDensityOnly)
1370 6 : return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
1371 :
1372 848 : if (eWorkingDataType == GDT_Int8 && eResample == GRA_NearestNeighbour)
1373 9 : return GWKNearestInt8(this);
1374 :
1375 839 : if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
1376 40 : return GWKNearestShort(this);
1377 :
1378 799 : if (eWorkingDataType == GDT_UInt16 && eResample == GRA_NearestNeighbour)
1379 10 : return GWKNearestUnsignedShort(this);
1380 :
1381 789 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
1382 : bNoMasksOrDstDensityOnly)
1383 11 : return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
1384 :
1385 778 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
1386 50 : return GWKNearestFloat(this);
1387 :
1388 728 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
1389 : bNoMasksOrDstDensityOnly)
1390 4 : return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
1391 :
1392 724 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
1393 : bNoMasksOrDstDensityOnly)
1394 9 : return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
1395 :
1396 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
1397 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
1398 : bNoMasksOrDstDensityOnly)
1399 : return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
1400 :
1401 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
1402 : bNoMasksOrDstDensityOnly)
1403 : return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
1404 : #endif
1405 :
1406 715 : if (eResample == GRA_Average)
1407 160 : return GWKAverageOrMode(this);
1408 :
1409 555 : if (eResample == GRA_RMS)
1410 9 : return GWKAverageOrMode(this);
1411 :
1412 546 : if (eResample == GRA_Mode)
1413 45 : return GWKAverageOrMode(this);
1414 :
1415 501 : if (eResample == GRA_Max)
1416 6 : return GWKAverageOrMode(this);
1417 :
1418 495 : if (eResample == GRA_Min)
1419 5 : return GWKAverageOrMode(this);
1420 :
1421 490 : if (eResample == GRA_Med)
1422 6 : return GWKAverageOrMode(this);
1423 :
1424 484 : if (eResample == GRA_Q1)
1425 10 : return GWKAverageOrMode(this);
1426 :
1427 474 : if (eResample == GRA_Q3)
1428 5 : return GWKAverageOrMode(this);
1429 :
1430 469 : if (eResample == GRA_Sum)
1431 19 : return GWKSumPreserving(this);
1432 :
1433 450 : if (!GDALDataTypeIsComplex(eWorkingDataType))
1434 : {
1435 223 : return GWKRealCase(this);
1436 : }
1437 :
1438 227 : return GWKGeneralCase(this);
1439 : }
1440 :
1441 : /************************************************************************/
1442 : /* Validate() */
1443 : /************************************************************************/
1444 :
1445 : /**
1446 : * \fn CPLErr GDALWarpKernel::Validate()
1447 : *
1448 : * Check the settings in the GDALWarpKernel, and issue a CPLError()
1449 : * (and return CE_Failure) if the configuration is considered to be
1450 : * invalid for some reason.
1451 : *
1452 : * This method will also do some standard defaulting such as setting
1453 : * pfnProgress to GDALDummyProgress() if it is NULL.
1454 : *
1455 : * @return CE_None on success or CE_Failure if an error is detected.
1456 : */
1457 :
1458 3791 : CPLErr GDALWarpKernel::Validate()
1459 :
1460 : {
1461 3791 : if (static_cast<size_t>(eResample) >=
1462 : (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
1463 : {
1464 0 : CPLError(CE_Failure, CPLE_AppDefined,
1465 : "Unsupported resampling method %d.",
1466 0 : static_cast<int>(eResample));
1467 0 : return CE_Failure;
1468 : }
1469 :
1470 : // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
1471 : // be ignored as contributing source pixels during resampling. Only taken into account by
1472 : // Average currently
1473 : const char *pszExcludedValues =
1474 3791 : CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
1475 3791 : if (pszExcludedValues)
1476 : {
1477 : const CPLStringList aosTokens(
1478 18 : CSLTokenizeString2(pszExcludedValues, "(,)", 0));
1479 18 : if ((aosTokens.size() % nBands) != 0)
1480 : {
1481 1 : CPLError(CE_Failure, CPLE_AppDefined,
1482 : "EXCLUDED_VALUES should contain one or several tuples of "
1483 : "%d values formatted like <R>,<G>,<B> or "
1484 : "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
1485 : "tuples",
1486 : nBands);
1487 1 : return CE_Failure;
1488 : }
1489 34 : std::vector<double> adfTuple;
1490 68 : for (int i = 0; i < aosTokens.size(); ++i)
1491 : {
1492 51 : adfTuple.push_back(CPLAtof(aosTokens[i]));
1493 51 : if (((i + 1) % nBands) == 0)
1494 : {
1495 17 : m_aadfExcludedValues.push_back(adfTuple);
1496 17 : adfTuple.clear();
1497 : }
1498 : }
1499 : }
1500 :
1501 3790 : return CE_None;
1502 : }
1503 :
1504 : /************************************************************************/
1505 : /* GWKOverlayDensity() */
1506 : /* */
1507 : /* Compute the final density for the destination pixel. This */
1508 : /* is a function of the overlay density (passed in) and the */
1509 : /* original density. */
1510 : /************************************************************************/
1511 :
1512 17762100 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
1513 : double dfDensity)
1514 : {
1515 17762100 : if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
1516 13309900 : return;
1517 :
1518 4452160 : poWK->pafDstDensity[iDstOffset] =
1519 4452160 : 1.0f -
1520 4452160 : (1.0f - float(dfDensity)) * (1.0f - poWK->pafDstDensity[iDstOffset]);
1521 : }
1522 :
1523 : /************************************************************************/
1524 : /* GWKRoundValueT() */
1525 : /************************************************************************/
1526 :
1527 : template <class T, class U, bool is_signed> struct sGWKRoundValueT
1528 : {
1529 : static T eval(U);
1530 : };
1531 :
1532 : template <class T, class U> struct sGWKRoundValueT<T, U, true> /* signed */
1533 : {
1534 791525 : static T eval(U value)
1535 : {
1536 791525 : return static_cast<T>(floor(value + U(0.5)));
1537 : }
1538 : };
1539 :
1540 : template <class T, class U> struct sGWKRoundValueT<T, U, false> /* unsigned */
1541 : {
1542 152026497 : static T eval(U value)
1543 : {
1544 152026497 : return static_cast<T>(value + U(0.5));
1545 : }
1546 : };
1547 :
1548 152818022 : template <class T, class U> static T GWKRoundValueT(U value)
1549 : {
1550 152818022 : return sGWKRoundValueT<T, U, cpl::NumericLimits<T>::is_signed>::eval(value);
1551 : }
1552 :
1553 268974 : template <> float GWKRoundValueT<float, double>(double value)
1554 : {
1555 268974 : return static_cast<float>(value);
1556 : }
1557 :
1558 : #ifdef notused
1559 : template <> double GWKRoundValueT<double, double>(double value)
1560 : {
1561 : return value;
1562 : }
1563 : #endif
1564 :
1565 : /************************************************************************/
1566 : /* GWKClampValueT() */
1567 : /************************************************************************/
1568 :
1569 145451362 : template <class T, class U> static CPL_INLINE T GWKClampValueT(U value)
1570 : {
1571 145451362 : if (value < static_cast<U>(cpl::NumericLimits<T>::min()))
1572 569367 : return cpl::NumericLimits<T>::min();
1573 144881964 : else if (value > static_cast<U>(cpl::NumericLimits<T>::max()))
1574 773825 : return cpl::NumericLimits<T>::max();
1575 : else
1576 144107844 : return GWKRoundValueT<T, U>(value);
1577 : }
1578 :
1579 718914 : template <> float GWKClampValueT<float, double>(double dfValue)
1580 : {
1581 718914 : return static_cast<float>(dfValue);
1582 : }
1583 :
1584 : #ifdef notused
1585 : template <> double GWKClampValueT<double, double>(double dfValue)
1586 : {
1587 : return dfValue;
1588 : }
1589 : #endif
1590 :
1591 : /************************************************************************/
1592 : /* AvoidNoData() */
1593 : /************************************************************************/
1594 :
1595 1283 : template <class T> inline void AvoidNoData(T *pDst, GPtrDiff_t iDstOffset)
1596 : {
1597 : if constexpr (cpl::NumericLimits<T>::is_integer)
1598 : {
1599 1027 : if (pDst[iDstOffset] == static_cast<T>(cpl::NumericLimits<T>::lowest()))
1600 : {
1601 515 : pDst[iDstOffset] =
1602 515 : static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
1603 : }
1604 : else
1605 512 : pDst[iDstOffset]--;
1606 : }
1607 : else
1608 : {
1609 256 : if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
1610 : {
1611 : using std::nextafter;
1612 0 : pDst[iDstOffset] = nextafter(pDst[iDstOffset], static_cast<T>(0));
1613 : }
1614 : else
1615 : {
1616 : using std::nextafter;
1617 256 : pDst[iDstOffset] =
1618 256 : nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
1619 : }
1620 : }
1621 1283 : }
1622 :
1623 : /************************************************************************/
1624 : /* AvoidNoData() */
1625 : /************************************************************************/
1626 :
1627 : template <class T>
1628 25539330 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
1629 : GPtrDiff_t iDstOffset)
1630 : {
1631 25539330 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1632 25539330 : T *pDst = reinterpret_cast<T *>(pabyDst);
1633 :
1634 25539330 : if (poWK->padfDstNoDataReal != nullptr &&
1635 11380638 : poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
1636 : {
1637 640 : AvoidNoData(pDst, iDstOffset);
1638 :
1639 640 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1640 : {
1641 40 : const_cast<GDALWarpKernel *>(poWK)
1642 : ->bWarnedAboutDstNoDataReplacement = true;
1643 40 : CPLError(CE_Warning, CPLE_AppDefined,
1644 : "Value %g in the source dataset has been changed to %g "
1645 : "in the destination dataset to avoid being treated as "
1646 : "NoData. To avoid this, select a different NoData value "
1647 : "for the destination dataset.",
1648 40 : poWK->padfDstNoDataReal[iBand],
1649 40 : static_cast<double>(pDst[iDstOffset]));
1650 : }
1651 : }
1652 25539330 : }
1653 :
1654 : /************************************************************************/
1655 : /* GWKAvoidNoDataMultiBand() */
1656 : /************************************************************************/
1657 :
1658 : template <class T>
1659 524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
1660 : GPtrDiff_t iDstOffset)
1661 : {
1662 524573 : T **ppDst = reinterpret_cast<T **>(poWK->papabyDstImage);
1663 524573 : if (poWK->padfDstNoDataReal != nullptr)
1664 : {
1665 208615 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1666 : {
1667 208294 : if (poWK->padfDstNoDataReal[iBand] !=
1668 208294 : static_cast<double>(ppDst[iBand][iDstOffset]))
1669 205830 : return;
1670 : }
1671 964 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1672 : {
1673 643 : AvoidNoData(ppDst[iBand], iDstOffset);
1674 : }
1675 :
1676 321 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1677 : {
1678 21 : const_cast<GDALWarpKernel *>(poWK)
1679 : ->bWarnedAboutDstNoDataReplacement = true;
1680 42 : std::string valueSrc, valueDst;
1681 64 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1682 : {
1683 43 : if (!valueSrc.empty())
1684 : {
1685 22 : valueSrc += ',';
1686 22 : valueDst += ',';
1687 : }
1688 43 : valueSrc += CPLSPrintf("%g", poWK->padfDstNoDataReal[iBand]);
1689 43 : valueDst += CPLSPrintf(
1690 43 : "%g", static_cast<double>(ppDst[iBand][iDstOffset]));
1691 : }
1692 21 : CPLError(CE_Warning, CPLE_AppDefined,
1693 : "Value %s in the source dataset has been changed to %s "
1694 : "in the destination dataset to avoid being treated as "
1695 : "NoData. To avoid this, select a different NoData value "
1696 : "for the destination dataset.",
1697 : valueSrc.c_str(), valueDst.c_str());
1698 : }
1699 : }
1700 : }
1701 :
1702 : /************************************************************************/
1703 : /* GWKAvoidNoDataMultiBand() */
1704 : /************************************************************************/
1705 :
1706 524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
1707 : GPtrDiff_t iDstOffset)
1708 : {
1709 524573 : switch (poWK->eWorkingDataType)
1710 : {
1711 523997 : case GDT_UInt8:
1712 523997 : GWKAvoidNoDataMultiBand<std::uint8_t>(poWK, iDstOffset);
1713 523997 : break;
1714 :
1715 64 : case GDT_Int8:
1716 64 : GWKAvoidNoDataMultiBand<std::int8_t>(poWK, iDstOffset);
1717 64 : break;
1718 :
1719 64 : case GDT_Int16:
1720 64 : GWKAvoidNoDataMultiBand<std::int16_t>(poWK, iDstOffset);
1721 64 : break;
1722 :
1723 64 : case GDT_UInt16:
1724 64 : GWKAvoidNoDataMultiBand<std::uint16_t>(poWK, iDstOffset);
1725 64 : break;
1726 :
1727 64 : case GDT_Int32:
1728 64 : GWKAvoidNoDataMultiBand<std::int32_t>(poWK, iDstOffset);
1729 64 : break;
1730 :
1731 64 : case GDT_UInt32:
1732 64 : GWKAvoidNoDataMultiBand<std::uint32_t>(poWK, iDstOffset);
1733 64 : break;
1734 :
1735 64 : case GDT_Int64:
1736 64 : GWKAvoidNoDataMultiBand<std::int64_t>(poWK, iDstOffset);
1737 64 : break;
1738 :
1739 64 : case GDT_UInt64:
1740 64 : GWKAvoidNoDataMultiBand<std::uint64_t>(poWK, iDstOffset);
1741 64 : break;
1742 :
1743 0 : case GDT_Float16:
1744 0 : GWKAvoidNoDataMultiBand<GFloat16>(poWK, iDstOffset);
1745 0 : break;
1746 :
1747 64 : case GDT_Float32:
1748 64 : GWKAvoidNoDataMultiBand<float>(poWK, iDstOffset);
1749 64 : break;
1750 :
1751 64 : case GDT_Float64:
1752 64 : GWKAvoidNoDataMultiBand<double>(poWK, iDstOffset);
1753 64 : break;
1754 :
1755 0 : case GDT_CInt16:
1756 : case GDT_CInt32:
1757 : case GDT_CFloat16:
1758 : case GDT_CFloat32:
1759 : case GDT_CFloat64:
1760 : case GDT_Unknown:
1761 : case GDT_TypeCount:
1762 0 : break;
1763 : }
1764 524573 : }
1765 :
1766 : /************************************************************************/
1767 : /* GWKSetPixelValueRealT() */
1768 : /************************************************************************/
1769 :
1770 : template <class T>
1771 14954277 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
1772 : GPtrDiff_t iDstOffset, double dfDensity,
1773 : T value, bool bAvoidNoDataSingleBand)
1774 : {
1775 14954277 : T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
1776 :
1777 : /* -------------------------------------------------------------------- */
1778 : /* If the source density is less than 100% we need to fetch the */
1779 : /* existing destination value, and mix it with the source to */
1780 : /* get the new "to apply" value. Also compute composite */
1781 : /* density. */
1782 : /* */
1783 : /* We avoid mixing if density is very near one or risk mixing */
1784 : /* in very extreme nodata values and causing odd results (#1610) */
1785 : /* -------------------------------------------------------------------- */
1786 14954277 : if (dfDensity < 0.9999)
1787 : {
1788 945508 : if (dfDensity < 0.0001)
1789 0 : return true;
1790 :
1791 945508 : double dfDstDensity = 1.0;
1792 :
1793 945508 : if (poWK->pafDstDensity != nullptr)
1794 944036 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1795 1472 : else if (poWK->panDstValid != nullptr &&
1796 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1797 0 : dfDstDensity = 0.0;
1798 :
1799 : // It seems like we also ought to be testing panDstValid[] here!
1800 :
1801 945508 : const double dfDstReal = static_cast<double>(pDst[iDstOffset]);
1802 :
1803 : // The destination density is really only relative to the portion
1804 : // not occluded by the overlay.
1805 945508 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1806 :
1807 945508 : const double dfReal =
1808 945508 : (double(value) * dfDensity + dfDstReal * dfDstInfluence) /
1809 945508 : (dfDensity + dfDstInfluence);
1810 :
1811 : /* --------------------------------------------------------------------
1812 : */
1813 : /* Actually apply the destination value. */
1814 : /* */
1815 : /* Avoid using the destination nodata value for integer datatypes
1816 : */
1817 : /* if by chance it is equal to the computed pixel value. */
1818 : /* --------------------------------------------------------------------
1819 : */
1820 945508 : pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
1821 : }
1822 : else
1823 : {
1824 14008836 : pDst[iDstOffset] = value;
1825 : }
1826 :
1827 14954277 : if (bAvoidNoDataSingleBand)
1828 13681621 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1829 :
1830 14954277 : return true;
1831 : }
1832 :
1833 : /************************************************************************/
1834 : /* ClampRoundAndAvoidNoData() */
1835 : /************************************************************************/
1836 :
1837 : template <class T>
1838 12158105 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
1839 : GPtrDiff_t iDstOffset, double dfReal,
1840 : bool bAvoidNoDataSingleBand)
1841 : {
1842 12158105 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1843 12158105 : T *pDst = reinterpret_cast<T *>(pabyDst);
1844 :
1845 : if constexpr (cpl::NumericLimits<T>::is_integer)
1846 : {
1847 : using std::floor;
1848 11660975 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
1849 6430 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
1850 11654575 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1851 23967 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
1852 : else if constexpr (cpl::NumericLimits<T>::is_signed)
1853 10410 : pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
1854 : else
1855 11620165 : pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
1856 : }
1857 : else
1858 : {
1859 497130 : pDst[iDstOffset] = static_cast<T>(dfReal);
1860 : }
1861 :
1862 12158105 : if (bAvoidNoDataSingleBand)
1863 11857709 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1864 12158105 : }
1865 :
1866 : /************************************************************************/
1867 : /* GWKSetPixelValue() */
1868 : /************************************************************************/
1869 :
1870 11045400 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
1871 : GPtrDiff_t iDstOffset, double dfDensity,
1872 : double dfReal, double dfImag,
1873 : bool bAvoidNoDataSingleBand)
1874 :
1875 : {
1876 11045400 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1877 :
1878 : /* -------------------------------------------------------------------- */
1879 : /* If the source density is less than 100% we need to fetch the */
1880 : /* existing destination value, and mix it with the source to */
1881 : /* get the new "to apply" value. Also compute composite */
1882 : /* density. */
1883 : /* */
1884 : /* We avoid mixing if density is very near one or risk mixing */
1885 : /* in very extreme nodata values and causing odd results (#1610) */
1886 : /* -------------------------------------------------------------------- */
1887 11045400 : if (dfDensity < 0.9999)
1888 : {
1889 800 : if (dfDensity < 0.0001)
1890 0 : return true;
1891 :
1892 800 : double dfDstDensity = 1.0;
1893 800 : if (poWK->pafDstDensity != nullptr)
1894 800 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1895 0 : else if (poWK->panDstValid != nullptr &&
1896 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1897 0 : dfDstDensity = 0.0;
1898 :
1899 800 : double dfDstReal = 0.0;
1900 800 : double dfDstImag = 0.0;
1901 : // It seems like we also ought to be testing panDstValid[] here!
1902 :
1903 : // TODO(schwehr): Factor out this repreated type of set.
1904 800 : switch (poWK->eWorkingDataType)
1905 : {
1906 0 : case GDT_UInt8:
1907 0 : dfDstReal = pabyDst[iDstOffset];
1908 0 : dfDstImag = 0.0;
1909 0 : break;
1910 :
1911 0 : case GDT_Int8:
1912 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1913 0 : dfDstImag = 0.0;
1914 0 : break;
1915 :
1916 400 : case GDT_Int16:
1917 400 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1918 400 : dfDstImag = 0.0;
1919 400 : break;
1920 :
1921 400 : case GDT_UInt16:
1922 400 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1923 400 : dfDstImag = 0.0;
1924 400 : break;
1925 :
1926 0 : case GDT_Int32:
1927 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1928 0 : dfDstImag = 0.0;
1929 0 : break;
1930 :
1931 0 : case GDT_UInt32:
1932 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1933 0 : dfDstImag = 0.0;
1934 0 : break;
1935 :
1936 0 : case GDT_Int64:
1937 0 : dfDstReal = static_cast<double>(
1938 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1939 0 : dfDstImag = 0.0;
1940 0 : break;
1941 :
1942 0 : case GDT_UInt64:
1943 0 : dfDstReal = static_cast<double>(
1944 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1945 0 : dfDstImag = 0.0;
1946 0 : break;
1947 :
1948 0 : case GDT_Float16:
1949 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1950 0 : dfDstImag = 0.0;
1951 0 : break;
1952 :
1953 0 : case GDT_Float32:
1954 0 : dfDstReal =
1955 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
1956 0 : dfDstImag = 0.0;
1957 0 : break;
1958 :
1959 0 : case GDT_Float64:
1960 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1961 0 : dfDstImag = 0.0;
1962 0 : break;
1963 :
1964 0 : case GDT_CInt16:
1965 0 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
1966 0 : dfDstImag =
1967 0 : reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
1968 0 : break;
1969 :
1970 0 : case GDT_CInt32:
1971 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
1972 0 : dfDstImag =
1973 0 : reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
1974 0 : break;
1975 :
1976 0 : case GDT_CFloat16:
1977 : dfDstReal =
1978 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
1979 : dfDstImag =
1980 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
1981 0 : break;
1982 :
1983 0 : case GDT_CFloat32:
1984 0 : dfDstReal =
1985 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset * 2]);
1986 0 : dfDstImag = double(
1987 0 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1]);
1988 0 : break;
1989 :
1990 0 : case GDT_CFloat64:
1991 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
1992 0 : dfDstImag =
1993 0 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
1994 0 : break;
1995 :
1996 0 : case GDT_Unknown:
1997 : case GDT_TypeCount:
1998 0 : CPLAssert(false);
1999 : return false;
2000 : }
2001 :
2002 : // The destination density is really only relative to the portion
2003 : // not occluded by the overlay.
2004 800 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
2005 :
2006 800 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
2007 800 : (dfDensity + dfDstInfluence);
2008 :
2009 800 : dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
2010 800 : (dfDensity + dfDstInfluence);
2011 : }
2012 :
2013 : /* -------------------------------------------------------------------- */
2014 : /* Actually apply the destination value. */
2015 : /* */
2016 : /* Avoid using the destination nodata value for integer datatypes */
2017 : /* if by chance it is equal to the computed pixel value. */
2018 : /* -------------------------------------------------------------------- */
2019 :
2020 11045400 : switch (poWK->eWorkingDataType)
2021 : {
2022 10323000 : case GDT_UInt8:
2023 10323000 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
2024 : bAvoidNoDataSingleBand);
2025 10323000 : break;
2026 :
2027 1 : case GDT_Int8:
2028 1 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
2029 : bAvoidNoDataSingleBand);
2030 1 : break;
2031 :
2032 7471 : case GDT_Int16:
2033 7471 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
2034 : bAvoidNoDataSingleBand);
2035 7471 : break;
2036 :
2037 464 : case GDT_UInt16:
2038 464 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
2039 : bAvoidNoDataSingleBand);
2040 464 : break;
2041 :
2042 63 : case GDT_UInt32:
2043 63 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
2044 : bAvoidNoDataSingleBand);
2045 63 : break;
2046 :
2047 63 : case GDT_Int32:
2048 63 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
2049 : bAvoidNoDataSingleBand);
2050 63 : break;
2051 :
2052 0 : case GDT_UInt64:
2053 0 : ClampRoundAndAvoidNoData<std::uint64_t>(
2054 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2055 0 : break;
2056 :
2057 0 : case GDT_Int64:
2058 0 : ClampRoundAndAvoidNoData<std::int64_t>(
2059 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2060 0 : break;
2061 :
2062 0 : case GDT_Float16:
2063 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
2064 : bAvoidNoDataSingleBand);
2065 0 : break;
2066 :
2067 478957 : case GDT_Float32:
2068 478957 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
2069 : bAvoidNoDataSingleBand);
2070 478957 : break;
2071 :
2072 149 : case GDT_Float64:
2073 149 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
2074 : bAvoidNoDataSingleBand);
2075 149 : break;
2076 :
2077 234079 : case GDT_CInt16:
2078 : {
2079 : typedef GInt16 T;
2080 234079 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
2081 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2082 0 : cpl::NumericLimits<T>::min();
2083 234079 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
2084 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2085 0 : cpl::NumericLimits<T>::max();
2086 : else
2087 234079 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2088 234079 : static_cast<T>(floor(dfReal + 0.5));
2089 234079 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
2090 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2091 0 : cpl::NumericLimits<T>::min();
2092 234079 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
2093 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2094 0 : cpl::NumericLimits<T>::max();
2095 : else
2096 234079 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2097 234079 : static_cast<T>(floor(dfImag + 0.5));
2098 234079 : break;
2099 : }
2100 :
2101 379 : case GDT_CInt32:
2102 : {
2103 : typedef GInt32 T;
2104 379 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
2105 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2106 0 : cpl::NumericLimits<T>::min();
2107 379 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
2108 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2109 0 : cpl::NumericLimits<T>::max();
2110 : else
2111 379 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2112 379 : static_cast<T>(floor(dfReal + 0.5));
2113 379 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
2114 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2115 0 : cpl::NumericLimits<T>::min();
2116 379 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
2117 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2118 0 : cpl::NumericLimits<T>::max();
2119 : else
2120 379 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2121 379 : static_cast<T>(floor(dfImag + 0.5));
2122 379 : break;
2123 : }
2124 :
2125 0 : case GDT_CFloat16:
2126 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
2127 0 : static_cast<GFloat16>(dfReal);
2128 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
2129 0 : static_cast<GFloat16>(dfImag);
2130 0 : break;
2131 :
2132 394 : case GDT_CFloat32:
2133 394 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
2134 394 : static_cast<float>(dfReal);
2135 394 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
2136 394 : static_cast<float>(dfImag);
2137 394 : break;
2138 :
2139 380 : case GDT_CFloat64:
2140 380 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
2141 380 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
2142 380 : break;
2143 :
2144 0 : case GDT_Unknown:
2145 : case GDT_TypeCount:
2146 0 : return false;
2147 : }
2148 :
2149 11045400 : return true;
2150 : }
2151 :
2152 : /************************************************************************/
2153 : /* GWKSetPixelValueReal() */
2154 : /************************************************************************/
2155 :
2156 1347980 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2157 : GPtrDiff_t iDstOffset, double dfDensity,
2158 : double dfReal, bool bAvoidNoDataSingleBand)
2159 :
2160 : {
2161 1347980 : GByte *pabyDst = poWK->papabyDstImage[iBand];
2162 :
2163 : /* -------------------------------------------------------------------- */
2164 : /* If the source density is less than 100% we need to fetch the */
2165 : /* existing destination value, and mix it with the source to */
2166 : /* get the new "to apply" value. Also compute composite */
2167 : /* density. */
2168 : /* */
2169 : /* We avoid mixing if density is very near one or risk mixing */
2170 : /* in very extreme nodata values and causing odd results (#1610) */
2171 : /* -------------------------------------------------------------------- */
2172 1347980 : if (dfDensity < 0.9999)
2173 : {
2174 600 : if (dfDensity < 0.0001)
2175 0 : return true;
2176 :
2177 600 : double dfDstReal = 0.0;
2178 600 : double dfDstDensity = 1.0;
2179 :
2180 600 : if (poWK->pafDstDensity != nullptr)
2181 600 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
2182 0 : else if (poWK->panDstValid != nullptr &&
2183 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
2184 0 : dfDstDensity = 0.0;
2185 :
2186 : // It seems like we also ought to be testing panDstValid[] here!
2187 :
2188 600 : switch (poWK->eWorkingDataType)
2189 : {
2190 0 : case GDT_UInt8:
2191 0 : dfDstReal = pabyDst[iDstOffset];
2192 0 : break;
2193 :
2194 0 : case GDT_Int8:
2195 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
2196 0 : break;
2197 :
2198 300 : case GDT_Int16:
2199 300 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
2200 300 : break;
2201 :
2202 300 : case GDT_UInt16:
2203 300 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
2204 300 : break;
2205 :
2206 0 : case GDT_Int32:
2207 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
2208 0 : break;
2209 :
2210 0 : case GDT_UInt32:
2211 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
2212 0 : break;
2213 :
2214 0 : case GDT_Int64:
2215 0 : dfDstReal = static_cast<double>(
2216 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
2217 0 : break;
2218 :
2219 0 : case GDT_UInt64:
2220 0 : dfDstReal = static_cast<double>(
2221 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
2222 0 : break;
2223 :
2224 0 : case GDT_Float16:
2225 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
2226 0 : break;
2227 :
2228 0 : case GDT_Float32:
2229 0 : dfDstReal =
2230 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
2231 0 : break;
2232 :
2233 0 : case GDT_Float64:
2234 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
2235 0 : break;
2236 :
2237 0 : case GDT_CInt16:
2238 : case GDT_CInt32:
2239 : case GDT_CFloat16:
2240 : case GDT_CFloat32:
2241 : case GDT_CFloat64:
2242 : case GDT_Unknown:
2243 : case GDT_TypeCount:
2244 0 : CPLAssert(false);
2245 : return false;
2246 : }
2247 :
2248 : // The destination density is really only relative to the portion
2249 : // not occluded by the overlay.
2250 600 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
2251 :
2252 600 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
2253 600 : (dfDensity + dfDstInfluence);
2254 : }
2255 :
2256 : /* -------------------------------------------------------------------- */
2257 : /* Actually apply the destination value. */
2258 : /* */
2259 : /* Avoid using the destination nodata value for integer datatypes */
2260 : /* if by chance it is equal to the computed pixel value. */
2261 : /* -------------------------------------------------------------------- */
2262 :
2263 1347980 : switch (poWK->eWorkingDataType)
2264 : {
2265 1325840 : case GDT_UInt8:
2266 1325840 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
2267 : bAvoidNoDataSingleBand);
2268 1325840 : break;
2269 :
2270 112 : case GDT_Int8:
2271 112 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
2272 : bAvoidNoDataSingleBand);
2273 112 : break;
2274 :
2275 1197 : case GDT_Int16:
2276 1197 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
2277 : bAvoidNoDataSingleBand);
2278 1197 : break;
2279 :
2280 475 : case GDT_UInt16:
2281 475 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
2282 : bAvoidNoDataSingleBand);
2283 475 : break;
2284 :
2285 539 : case GDT_UInt32:
2286 539 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
2287 : bAvoidNoDataSingleBand);
2288 539 : break;
2289 :
2290 1342 : case GDT_Int32:
2291 1342 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
2292 : bAvoidNoDataSingleBand);
2293 1342 : break;
2294 :
2295 224 : case GDT_UInt64:
2296 224 : ClampRoundAndAvoidNoData<std::uint64_t>(
2297 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2298 224 : break;
2299 :
2300 224 : case GDT_Int64:
2301 224 : ClampRoundAndAvoidNoData<std::int64_t>(
2302 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2303 224 : break;
2304 :
2305 0 : case GDT_Float16:
2306 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
2307 : bAvoidNoDataSingleBand);
2308 0 : break;
2309 :
2310 3538 : case GDT_Float32:
2311 3538 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
2312 : bAvoidNoDataSingleBand);
2313 3538 : break;
2314 :
2315 14486 : case GDT_Float64:
2316 14486 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
2317 : bAvoidNoDataSingleBand);
2318 14486 : break;
2319 :
2320 0 : case GDT_CInt16:
2321 : case GDT_CInt32:
2322 : case GDT_CFloat16:
2323 : case GDT_CFloat32:
2324 : case GDT_CFloat64:
2325 0 : return false;
2326 :
2327 0 : case GDT_Unknown:
2328 : case GDT_TypeCount:
2329 0 : CPLAssert(false);
2330 : return false;
2331 : }
2332 :
2333 1347980 : return true;
2334 : }
2335 :
2336 : /************************************************************************/
2337 : /* GWKGetPixelValue() */
2338 : /************************************************************************/
2339 :
2340 : /* It is assumed that panUnifiedSrcValid has been checked before */
2341 :
2342 40173600 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
2343 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2344 : double *pdfReal, double *pdfImag)
2345 :
2346 : {
2347 40173600 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2348 :
2349 80347200 : if (poWK->papanBandSrcValid != nullptr &&
2350 40173600 : poWK->papanBandSrcValid[iBand] != nullptr &&
2351 0 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2352 : {
2353 0 : *pdfDensity = 0.0;
2354 0 : return false;
2355 : }
2356 :
2357 40173600 : *pdfReal = 0.0;
2358 40173600 : *pdfImag = 0.0;
2359 :
2360 : // TODO(schwehr): Fix casting.
2361 40173600 : switch (poWK->eWorkingDataType)
2362 : {
2363 39096600 : case GDT_UInt8:
2364 39096600 : *pdfReal = pabySrc[iSrcOffset];
2365 39096600 : *pdfImag = 0.0;
2366 39096600 : break;
2367 :
2368 3 : case GDT_Int8:
2369 3 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2370 3 : *pdfImag = 0.0;
2371 3 : break;
2372 :
2373 28229 : case GDT_Int16:
2374 28229 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2375 28229 : *pdfImag = 0.0;
2376 28229 : break;
2377 :
2378 166 : case GDT_UInt16:
2379 166 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2380 166 : *pdfImag = 0.0;
2381 166 : break;
2382 :
2383 63 : case GDT_Int32:
2384 63 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2385 63 : *pdfImag = 0.0;
2386 63 : break;
2387 :
2388 63 : case GDT_UInt32:
2389 63 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2390 63 : *pdfImag = 0.0;
2391 63 : break;
2392 :
2393 0 : case GDT_Int64:
2394 0 : *pdfReal = static_cast<double>(
2395 0 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2396 0 : *pdfImag = 0.0;
2397 0 : break;
2398 :
2399 0 : case GDT_UInt64:
2400 0 : *pdfReal = static_cast<double>(
2401 0 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2402 0 : *pdfImag = 0.0;
2403 0 : break;
2404 :
2405 0 : case GDT_Float16:
2406 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2407 0 : *pdfImag = 0.0;
2408 0 : break;
2409 :
2410 1047220 : case GDT_Float32:
2411 1047220 : *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2412 1047220 : *pdfImag = 0.0;
2413 1047220 : break;
2414 :
2415 587 : case GDT_Float64:
2416 587 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2417 587 : *pdfImag = 0.0;
2418 587 : break;
2419 :
2420 133 : case GDT_CInt16:
2421 133 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
2422 133 : *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
2423 133 : break;
2424 :
2425 133 : case GDT_CInt32:
2426 133 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
2427 133 : *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
2428 133 : break;
2429 :
2430 0 : case GDT_CFloat16:
2431 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
2432 0 : *pdfImag =
2433 0 : reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
2434 0 : break;
2435 :
2436 194 : case GDT_CFloat32:
2437 194 : *pdfReal =
2438 194 : double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2]);
2439 194 : *pdfImag =
2440 194 : double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1]);
2441 194 : break;
2442 :
2443 138 : case GDT_CFloat64:
2444 138 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
2445 138 : *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
2446 138 : break;
2447 :
2448 0 : case GDT_Unknown:
2449 : case GDT_TypeCount:
2450 0 : CPLAssert(false);
2451 : *pdfDensity = 0.0;
2452 : return false;
2453 : }
2454 :
2455 40173600 : if (poWK->pafUnifiedSrcDensity != nullptr)
2456 12745700 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2457 : else
2458 27427800 : *pdfDensity = 1.0;
2459 :
2460 40173600 : return *pdfDensity != 0.0;
2461 : }
2462 :
2463 : /************************************************************************/
2464 : /* GWKGetPixelValueReal() */
2465 : /************************************************************************/
2466 :
2467 15516 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2468 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2469 : double *pdfReal)
2470 :
2471 : {
2472 15516 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2473 :
2474 31034 : if (poWK->papanBandSrcValid != nullptr &&
2475 15518 : poWK->papanBandSrcValid[iBand] != nullptr &&
2476 2 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2477 : {
2478 0 : *pdfDensity = 0.0;
2479 0 : return false;
2480 : }
2481 :
2482 15516 : switch (poWK->eWorkingDataType)
2483 : {
2484 1 : case GDT_UInt8:
2485 1 : *pdfReal = pabySrc[iSrcOffset];
2486 1 : break;
2487 :
2488 0 : case GDT_Int8:
2489 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2490 0 : break;
2491 :
2492 1 : case GDT_Int16:
2493 1 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2494 1 : break;
2495 :
2496 1 : case GDT_UInt16:
2497 1 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2498 1 : break;
2499 :
2500 982 : case GDT_Int32:
2501 982 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2502 982 : break;
2503 :
2504 179 : case GDT_UInt32:
2505 179 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2506 179 : break;
2507 :
2508 112 : case GDT_Int64:
2509 112 : *pdfReal = static_cast<double>(
2510 112 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2511 112 : break;
2512 :
2513 112 : case GDT_UInt64:
2514 112 : *pdfReal = static_cast<double>(
2515 112 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2516 112 : break;
2517 :
2518 0 : case GDT_Float16:
2519 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2520 0 : break;
2521 :
2522 2 : case GDT_Float32:
2523 2 : *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2524 2 : break;
2525 :
2526 14126 : case GDT_Float64:
2527 14126 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2528 14126 : break;
2529 :
2530 0 : case GDT_CInt16:
2531 : case GDT_CInt32:
2532 : case GDT_CFloat16:
2533 : case GDT_CFloat32:
2534 : case GDT_CFloat64:
2535 : case GDT_Unknown:
2536 : case GDT_TypeCount:
2537 0 : CPLAssert(false);
2538 : return false;
2539 : }
2540 :
2541 15516 : if (poWK->pafUnifiedSrcDensity != nullptr)
2542 0 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2543 : else
2544 15516 : *pdfDensity = 1.0;
2545 :
2546 15516 : return *pdfDensity != 0.0;
2547 : }
2548 :
2549 : /************************************************************************/
2550 : /* GWKGetPixelRow() */
2551 : /************************************************************************/
2552 :
2553 : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
2554 : /* data-types. */
2555 :
2556 2369710 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
2557 : GPtrDiff_t iSrcOffset, int nHalfSrcLen,
2558 : double *padfDensity, double adfReal[],
2559 : double *padfImag)
2560 : {
2561 : // We know that nSrcLen is even, so we can *always* unroll loops 2x.
2562 2369710 : const int nSrcLen = nHalfSrcLen * 2;
2563 2369710 : bool bHasValid = false;
2564 :
2565 2369710 : if (padfDensity != nullptr)
2566 : {
2567 : // Init the density.
2568 3384030 : for (int i = 0; i < nSrcLen; i += 2)
2569 : {
2570 2211910 : padfDensity[i] = 1.0;
2571 2211910 : padfDensity[i + 1] = 1.0;
2572 : }
2573 :
2574 1172120 : if (poWK->panUnifiedSrcValid != nullptr)
2575 : {
2576 3281460 : for (int i = 0; i < nSrcLen; i += 2)
2577 : {
2578 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
2579 2067740 : bHasValid = true;
2580 : else
2581 74323 : padfDensity[i] = 0.0;
2582 :
2583 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
2584 2068400 : bHasValid = true;
2585 : else
2586 73668 : padfDensity[i + 1] = 0.0;
2587 : }
2588 :
2589 : // Reset or fail as needed.
2590 1139400 : if (bHasValid)
2591 1116590 : bHasValid = false;
2592 : else
2593 22806 : return false;
2594 : }
2595 :
2596 1149320 : if (poWK->papanBandSrcValid != nullptr &&
2597 0 : poWK->papanBandSrcValid[iBand] != nullptr)
2598 : {
2599 0 : for (int i = 0; i < nSrcLen; i += 2)
2600 : {
2601 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
2602 0 : bHasValid = true;
2603 : else
2604 0 : padfDensity[i] = 0.0;
2605 :
2606 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
2607 0 : iSrcOffset + i + 1))
2608 0 : bHasValid = true;
2609 : else
2610 0 : padfDensity[i + 1] = 0.0;
2611 : }
2612 :
2613 : // Reset or fail as needed.
2614 0 : if (bHasValid)
2615 0 : bHasValid = false;
2616 : else
2617 0 : return false;
2618 : }
2619 : }
2620 :
2621 : // TODO(schwehr): Fix casting.
2622 : // Fetch data.
2623 2346910 : switch (poWK->eWorkingDataType)
2624 : {
2625 1136680 : case GDT_UInt8:
2626 : {
2627 1136680 : GByte *pSrc =
2628 1136680 : reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
2629 1136680 : pSrc += iSrcOffset;
2630 3281570 : for (int i = 0; i < nSrcLen; i += 2)
2631 : {
2632 2144890 : adfReal[i] = pSrc[i];
2633 2144890 : adfReal[i + 1] = pSrc[i + 1];
2634 : }
2635 1136680 : break;
2636 : }
2637 :
2638 196 : case GDT_Int8:
2639 : {
2640 196 : GInt8 *pSrc =
2641 196 : reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
2642 196 : pSrc += iSrcOffset;
2643 392 : for (int i = 0; i < nSrcLen; i += 2)
2644 : {
2645 196 : adfReal[i] = pSrc[i];
2646 196 : adfReal[i + 1] = pSrc[i + 1];
2647 : }
2648 196 : break;
2649 : }
2650 :
2651 5754 : case GDT_Int16:
2652 : {
2653 5754 : GInt16 *pSrc =
2654 5754 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2655 5754 : pSrc += iSrcOffset;
2656 21772 : for (int i = 0; i < nSrcLen; i += 2)
2657 : {
2658 16018 : adfReal[i] = pSrc[i];
2659 16018 : adfReal[i + 1] = pSrc[i + 1];
2660 : }
2661 5754 : break;
2662 : }
2663 :
2664 4310 : case GDT_UInt16:
2665 : {
2666 4310 : GUInt16 *pSrc =
2667 4310 : reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
2668 4310 : pSrc += iSrcOffset;
2669 18884 : for (int i = 0; i < nSrcLen; i += 2)
2670 : {
2671 14574 : adfReal[i] = pSrc[i];
2672 14574 : adfReal[i + 1] = pSrc[i + 1];
2673 : }
2674 4310 : break;
2675 : }
2676 :
2677 946 : case GDT_Int32:
2678 : {
2679 946 : GInt32 *pSrc =
2680 946 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2681 946 : pSrc += iSrcOffset;
2682 2624 : for (int i = 0; i < nSrcLen; i += 2)
2683 : {
2684 1678 : adfReal[i] = pSrc[i];
2685 1678 : adfReal[i + 1] = pSrc[i + 1];
2686 : }
2687 946 : break;
2688 : }
2689 :
2690 946 : case GDT_UInt32:
2691 : {
2692 946 : GUInt32 *pSrc =
2693 946 : reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
2694 946 : pSrc += iSrcOffset;
2695 2624 : for (int i = 0; i < nSrcLen; i += 2)
2696 : {
2697 1678 : adfReal[i] = pSrc[i];
2698 1678 : adfReal[i + 1] = pSrc[i + 1];
2699 : }
2700 946 : break;
2701 : }
2702 :
2703 196 : case GDT_Int64:
2704 : {
2705 196 : auto pSrc =
2706 196 : reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
2707 196 : pSrc += iSrcOffset;
2708 392 : for (int i = 0; i < nSrcLen; i += 2)
2709 : {
2710 196 : adfReal[i] = static_cast<double>(pSrc[i]);
2711 196 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2712 : }
2713 196 : break;
2714 : }
2715 :
2716 196 : case GDT_UInt64:
2717 : {
2718 196 : auto pSrc =
2719 196 : reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
2720 196 : pSrc += iSrcOffset;
2721 392 : for (int i = 0; i < nSrcLen; i += 2)
2722 : {
2723 196 : adfReal[i] = static_cast<double>(pSrc[i]);
2724 196 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2725 : }
2726 196 : break;
2727 : }
2728 :
2729 0 : case GDT_Float16:
2730 : {
2731 0 : GFloat16 *pSrc =
2732 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2733 0 : pSrc += iSrcOffset;
2734 0 : for (int i = 0; i < nSrcLen; i += 2)
2735 : {
2736 0 : adfReal[i] = pSrc[i];
2737 0 : adfReal[i + 1] = pSrc[i + 1];
2738 : }
2739 0 : break;
2740 : }
2741 :
2742 25270 : case GDT_Float32:
2743 : {
2744 25270 : float *pSrc =
2745 25270 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2746 25270 : pSrc += iSrcOffset;
2747 121739 : for (int i = 0; i < nSrcLen; i += 2)
2748 : {
2749 96469 : adfReal[i] = double(pSrc[i]);
2750 96469 : adfReal[i + 1] = double(pSrc[i + 1]);
2751 : }
2752 25270 : break;
2753 : }
2754 :
2755 946 : case GDT_Float64:
2756 : {
2757 946 : double *pSrc =
2758 946 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2759 946 : pSrc += iSrcOffset;
2760 2624 : for (int i = 0; i < nSrcLen; i += 2)
2761 : {
2762 1678 : adfReal[i] = pSrc[i];
2763 1678 : adfReal[i + 1] = pSrc[i + 1];
2764 : }
2765 946 : break;
2766 : }
2767 :
2768 1169220 : case GDT_CInt16:
2769 : {
2770 1169220 : GInt16 *pSrc =
2771 1169220 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2772 1169220 : pSrc += 2 * iSrcOffset;
2773 4676020 : for (int i = 0; i < nSrcLen; i += 2)
2774 : {
2775 3506800 : adfReal[i] = pSrc[2 * i];
2776 3506800 : padfImag[i] = pSrc[2 * i + 1];
2777 :
2778 3506800 : adfReal[i + 1] = pSrc[2 * i + 2];
2779 3506800 : padfImag[i + 1] = pSrc[2 * i + 3];
2780 : }
2781 1169220 : break;
2782 : }
2783 :
2784 750 : case GDT_CInt32:
2785 : {
2786 750 : GInt32 *pSrc =
2787 750 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2788 750 : pSrc += 2 * iSrcOffset;
2789 2232 : for (int i = 0; i < nSrcLen; i += 2)
2790 : {
2791 1482 : adfReal[i] = pSrc[2 * i];
2792 1482 : padfImag[i] = pSrc[2 * i + 1];
2793 :
2794 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2795 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2796 : }
2797 750 : break;
2798 : }
2799 :
2800 0 : case GDT_CFloat16:
2801 : {
2802 0 : GFloat16 *pSrc =
2803 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2804 0 : pSrc += 2 * iSrcOffset;
2805 0 : for (int i = 0; i < nSrcLen; i += 2)
2806 : {
2807 0 : adfReal[i] = pSrc[2 * i];
2808 0 : padfImag[i] = pSrc[2 * i + 1];
2809 :
2810 0 : adfReal[i + 1] = pSrc[2 * i + 2];
2811 0 : padfImag[i + 1] = pSrc[2 * i + 3];
2812 : }
2813 0 : break;
2814 : }
2815 :
2816 750 : case GDT_CFloat32:
2817 : {
2818 750 : float *pSrc =
2819 750 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2820 750 : pSrc += 2 * iSrcOffset;
2821 2232 : for (int i = 0; i < nSrcLen; i += 2)
2822 : {
2823 1482 : adfReal[i] = double(pSrc[2 * i]);
2824 1482 : padfImag[i] = double(pSrc[2 * i + 1]);
2825 :
2826 1482 : adfReal[i + 1] = double(pSrc[2 * i + 2]);
2827 1482 : padfImag[i + 1] = double(pSrc[2 * i + 3]);
2828 : }
2829 750 : break;
2830 : }
2831 :
2832 750 : case GDT_CFloat64:
2833 : {
2834 750 : double *pSrc =
2835 750 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2836 750 : pSrc += 2 * iSrcOffset;
2837 2232 : for (int i = 0; i < nSrcLen; i += 2)
2838 : {
2839 1482 : adfReal[i] = pSrc[2 * i];
2840 1482 : padfImag[i] = pSrc[2 * i + 1];
2841 :
2842 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2843 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2844 : }
2845 750 : break;
2846 : }
2847 :
2848 0 : case GDT_Unknown:
2849 : case GDT_TypeCount:
2850 0 : CPLAssert(false);
2851 : if (padfDensity)
2852 : memset(padfDensity, 0, nSrcLen * sizeof(double));
2853 : return false;
2854 : }
2855 :
2856 2346910 : if (padfDensity == nullptr)
2857 1197590 : return true;
2858 :
2859 1149320 : if (poWK->pafUnifiedSrcDensity == nullptr)
2860 : {
2861 3256740 : for (int i = 0; i < nSrcLen; i += 2)
2862 : {
2863 : // Take into account earlier calcs.
2864 2127390 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2865 : {
2866 2087480 : padfDensity[i] = 1.0;
2867 2087480 : bHasValid = true;
2868 : }
2869 :
2870 2127390 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2871 : {
2872 2088140 : padfDensity[i + 1] = 1.0;
2873 2088140 : bHasValid = true;
2874 : }
2875 : }
2876 : }
2877 : else
2878 : {
2879 70068 : for (int i = 0; i < nSrcLen; i += 2)
2880 : {
2881 50103 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2882 50103 : padfDensity[i] =
2883 50103 : double(poWK->pafUnifiedSrcDensity[iSrcOffset + i]);
2884 50103 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2885 49252 : bHasValid = true;
2886 :
2887 50103 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2888 50103 : padfDensity[i + 1] =
2889 50103 : double(poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]);
2890 50103 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2891 49166 : bHasValid = true;
2892 : }
2893 : }
2894 :
2895 1149320 : return bHasValid;
2896 : }
2897 :
2898 : /************************************************************************/
2899 : /* GWKGetPixelT() */
2900 : /************************************************************************/
2901 :
2902 : template <class T>
2903 14964659 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
2904 : GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
2905 :
2906 : {
2907 14964659 : T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2908 :
2909 33172043 : if ((poWK->panUnifiedSrcValid != nullptr &&
2910 29929218 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
2911 14964659 : (poWK->papanBandSrcValid != nullptr &&
2912 589863 : poWK->papanBandSrcValid[iBand] != nullptr &&
2913 589863 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
2914 : {
2915 9 : *pdfDensity = 0.0;
2916 9 : return false;
2917 : }
2918 :
2919 14964559 : *pValue = pSrc[iSrcOffset];
2920 :
2921 14964559 : if (poWK->pafUnifiedSrcDensity == nullptr)
2922 13842266 : *pdfDensity = 1.0;
2923 : else
2924 1122362 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2925 :
2926 14964559 : return *pdfDensity != 0.0;
2927 : }
2928 :
2929 : /************************************************************************/
2930 : /* GWKBilinearResample() */
2931 : /* Set of bilinear interpolators */
2932 : /************************************************************************/
2933 :
2934 77448 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
2935 : double dfSrcX, double dfSrcY,
2936 : double *pdfDensity, double *pdfReal,
2937 : double *pdfImag)
2938 :
2939 : {
2940 : // Save as local variables to avoid following pointers.
2941 77448 : const int nSrcXSize = poWK->nSrcXSize;
2942 77448 : const int nSrcYSize = poWK->nSrcYSize;
2943 :
2944 77448 : int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2945 77448 : int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2946 77448 : double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2947 77448 : double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2948 77448 : bool bShifted = false;
2949 :
2950 77448 : if (iSrcX == -1)
2951 : {
2952 1534 : iSrcX = 0;
2953 1534 : dfRatioX = 1;
2954 : }
2955 77448 : if (iSrcY == -1)
2956 : {
2957 7734 : iSrcY = 0;
2958 7734 : dfRatioY = 1;
2959 : }
2960 77448 : GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
2961 :
2962 : // Shift so we don't overrun the array.
2963 77448 : if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
2964 77330 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
2965 77330 : iSrcOffset + nSrcXSize + 1)
2966 : {
2967 230 : bShifted = true;
2968 230 : --iSrcOffset;
2969 : }
2970 :
2971 77448 : double adfDensity[2] = {0.0, 0.0};
2972 77448 : double adfReal[2] = {0.0, 0.0};
2973 77448 : double adfImag[2] = {0.0, 0.0};
2974 77448 : double dfAccumulatorReal = 0.0;
2975 77448 : double dfAccumulatorImag = 0.0;
2976 77448 : double dfAccumulatorDensity = 0.0;
2977 77448 : double dfAccumulatorDivisor = 0.0;
2978 :
2979 77448 : const GPtrDiff_t nSrcPixels =
2980 77448 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
2981 : // Get pixel row.
2982 77448 : if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
2983 154896 : iSrcOffset < nSrcPixels &&
2984 77448 : GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
2985 : adfImag))
2986 : {
2987 71504 : double dfMult1 = dfRatioX * dfRatioY;
2988 71504 : double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
2989 :
2990 : // Shifting corrected.
2991 71504 : if (bShifted)
2992 : {
2993 230 : adfReal[0] = adfReal[1];
2994 230 : adfImag[0] = adfImag[1];
2995 230 : adfDensity[0] = adfDensity[1];
2996 : }
2997 :
2998 : // Upper Left Pixel.
2999 71504 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
3000 71504 : adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
3001 : {
3002 66050 : dfAccumulatorDivisor += dfMult1;
3003 :
3004 66050 : dfAccumulatorReal += adfReal[0] * dfMult1;
3005 66050 : dfAccumulatorImag += adfImag[0] * dfMult1;
3006 66050 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
3007 : }
3008 :
3009 : // Upper Right Pixel.
3010 71504 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
3011 70609 : adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
3012 : {
3013 65335 : dfAccumulatorDivisor += dfMult2;
3014 :
3015 65335 : dfAccumulatorReal += adfReal[1] * dfMult2;
3016 65335 : dfAccumulatorImag += adfImag[1] * dfMult2;
3017 65335 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
3018 : }
3019 : }
3020 :
3021 : // Get pixel row.
3022 77448 : if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
3023 228032 : iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
3024 73136 : GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
3025 : adfReal, adfImag))
3026 : {
3027 67577 : double dfMult1 = dfRatioX * (1.0 - dfRatioY);
3028 67577 : double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
3029 :
3030 : // Shifting corrected
3031 67577 : if (bShifted)
3032 : {
3033 112 : adfReal[0] = adfReal[1];
3034 112 : adfImag[0] = adfImag[1];
3035 112 : adfDensity[0] = adfDensity[1];
3036 : }
3037 :
3038 : // Lower Left Pixel
3039 67577 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
3040 67577 : adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
3041 : {
3042 62298 : dfAccumulatorDivisor += dfMult1;
3043 :
3044 62298 : dfAccumulatorReal += adfReal[0] * dfMult1;
3045 62298 : dfAccumulatorImag += adfImag[0] * dfMult1;
3046 62298 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
3047 : }
3048 :
3049 : // Lower Right Pixel.
3050 67577 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
3051 66800 : adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
3052 : {
3053 61823 : dfAccumulatorDivisor += dfMult2;
3054 :
3055 61823 : dfAccumulatorReal += adfReal[1] * dfMult2;
3056 61823 : dfAccumulatorImag += adfImag[1] * dfMult2;
3057 61823 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
3058 : }
3059 : }
3060 :
3061 : /* -------------------------------------------------------------------- */
3062 : /* Return result. */
3063 : /* -------------------------------------------------------------------- */
3064 77448 : if (dfAccumulatorDivisor == 1.0)
3065 : {
3066 45929 : *pdfReal = dfAccumulatorReal;
3067 45929 : *pdfImag = dfAccumulatorImag;
3068 45929 : *pdfDensity = dfAccumulatorDensity;
3069 45929 : return false;
3070 : }
3071 31519 : else if (dfAccumulatorDivisor < 0.00001)
3072 : {
3073 0 : *pdfReal = 0.0;
3074 0 : *pdfImag = 0.0;
3075 0 : *pdfDensity = 0.0;
3076 0 : return false;
3077 : }
3078 : else
3079 : {
3080 31519 : *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
3081 31519 : *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
3082 31519 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
3083 31519 : return true;
3084 : }
3085 : }
3086 :
3087 : template <class T>
3088 8979122 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3089 : int iBand, double dfSrcX,
3090 : double dfSrcY, T *pValue)
3091 :
3092 : {
3093 :
3094 8979122 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3095 8979122 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3096 8979122 : GPtrDiff_t iSrcOffset =
3097 8979122 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3098 8979122 : const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
3099 8979122 : const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
3100 :
3101 8979122 : const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
3102 :
3103 8979122 : if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
3104 6224079 : iSrcY + 1 < poWK->nSrcYSize)
3105 : {
3106 6032592 : const double dfAccumulator =
3107 6032592 : (double(pSrc[iSrcOffset]) * dfRatioX +
3108 6032592 : double(pSrc[iSrcOffset + 1]) * (1.0 - dfRatioX)) *
3109 : dfRatioY +
3110 6032592 : (double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfRatioX +
3111 6032592 : double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) *
3112 6032592 : (1.0 - dfRatioX)) *
3113 6032592 : (1.0 - dfRatioY);
3114 :
3115 6032592 : *pValue = GWKRoundValueT<T>(dfAccumulator);
3116 :
3117 6032592 : return true;
3118 : }
3119 :
3120 2946530 : double dfAccumulatorDivisor = 0.0;
3121 2946530 : double dfAccumulator = 0.0;
3122 :
3123 : // Upper Left Pixel.
3124 2946530 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
3125 564876 : iSrcY < poWK->nSrcYSize)
3126 : {
3127 564876 : const double dfMult = dfRatioX * dfRatioY;
3128 :
3129 564876 : dfAccumulatorDivisor += dfMult;
3130 :
3131 564876 : dfAccumulator += double(pSrc[iSrcOffset]) * dfMult;
3132 : }
3133 :
3134 : // Upper Right Pixel.
3135 2946530 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
3136 2261926 : iSrcY < poWK->nSrcYSize)
3137 : {
3138 2261926 : const double dfMult = (1.0 - dfRatioX) * dfRatioY;
3139 :
3140 2261926 : dfAccumulatorDivisor += dfMult;
3141 :
3142 2261926 : dfAccumulator += double(pSrc[iSrcOffset + 1]) * dfMult;
3143 : }
3144 :
3145 : // Lower Right Pixel.
3146 2946530 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
3147 2512924 : iSrcY + 1 < poWK->nSrcYSize)
3148 : {
3149 2261243 : const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
3150 :
3151 2261243 : dfAccumulatorDivisor += dfMult;
3152 :
3153 2261243 : dfAccumulator +=
3154 2261243 : double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) * dfMult;
3155 : }
3156 :
3157 : // Lower Left Pixel.
3158 2946530 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
3159 815601 : iSrcY + 1 < poWK->nSrcYSize)
3160 : {
3161 563917 : const double dfMult = dfRatioX * (1.0 - dfRatioY);
3162 :
3163 563917 : dfAccumulatorDivisor += dfMult;
3164 :
3165 563917 : dfAccumulator += double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfMult;
3166 : }
3167 :
3168 : /* -------------------------------------------------------------------- */
3169 : /* Return result. */
3170 : /* -------------------------------------------------------------------- */
3171 2946530 : double dfValue = 0.0;
3172 :
3173 2946530 : if (dfAccumulatorDivisor < 0.00001)
3174 : {
3175 0 : *pValue = 0;
3176 0 : return false;
3177 : }
3178 2946530 : else if (dfAccumulatorDivisor == 1.0)
3179 : {
3180 22176 : dfValue = dfAccumulator;
3181 : }
3182 : else
3183 : {
3184 2924358 : dfValue = dfAccumulator / dfAccumulatorDivisor;
3185 : }
3186 :
3187 2946530 : *pValue = GWKRoundValueT<T>(dfValue);
3188 :
3189 2946530 : return true;
3190 : }
3191 :
3192 : /************************************************************************/
3193 : /* GWKCubicResample() */
3194 : /* Set of bicubic interpolators using cubic convolution. */
3195 : /************************************************************************/
3196 :
3197 : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
3198 : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
3199 : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
3200 :
3201 : template <typename T>
3202 1810720 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
3203 : T f1, T f2, T f3)
3204 : {
3205 1810720 : return (f1 + T(0.5) * (distance1 * (f2 - f0) +
3206 1810720 : distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
3207 1810720 : distance3 * (3 * (f1 - f2) + f3 - f0)));
3208 : }
3209 :
3210 : /************************************************************************/
3211 : /* GWKCubicComputeWeights() */
3212 : /************************************************************************/
3213 :
3214 : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
3215 :
3216 : template <typename T>
3217 97781060 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
3218 : {
3219 97781060 : const T halfX = T(0.5) * x;
3220 97781060 : const T threeX = T(3.0) * x;
3221 97781060 : const T halfX2 = halfX * x;
3222 :
3223 97781060 : coeffs[0] = halfX * (-1 + x * (2 - x));
3224 97781060 : coeffs[1] = 1 + halfX2 * (-5 + threeX);
3225 97781060 : coeffs[2] = halfX * (1 + x * (4 - threeX));
3226 97781060 : coeffs[3] = halfX2 * (-1 + x);
3227 97781060 : }
3228 :
3229 14682546 : template <typename T> inline double CONVOL4(const double v1[4], const T v2[4])
3230 : {
3231 14682546 : return v1[0] * double(v2[0]) + v1[1] * double(v2[1]) +
3232 14682546 : v1[2] * double(v2[2]) + v1[3] * double(v2[3]);
3233 : }
3234 :
3235 : #if 0
3236 : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
3237 : // instead of 17.
3238 : // TODO(schwehr): Use an inline function.
3239 : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \
3240 : { \
3241 : const double dfX = dfX_; \
3242 : dfHalfX = 0.5 * dfX; \
3243 : const double dfThreeX = 3.0 * dfX; \
3244 : const double dfXMinus1 = dfX - 1; \
3245 : \
3246 : adfCoeffs[0] = -1 + dfX * (2 - dfX); \
3247 : adfCoeffs[1] = dfX * (-5 + dfThreeX); \
3248 : /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \
3249 : adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \
3250 : /*adfCoeffs[3] = dfX * (-1 + dfX); */ \
3251 : adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \
3252 : }
3253 :
3254 : // TODO(schwehr): Use an inline function.
3255 : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \
3256 : ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
3257 : (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
3258 : #endif
3259 :
3260 302045 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
3261 : double dfSrcX, double dfSrcY,
3262 : double *pdfDensity, double *pdfReal,
3263 : double *pdfImag)
3264 :
3265 : {
3266 302045 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3267 302045 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3268 302045 : GPtrDiff_t iSrcOffset =
3269 302045 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3270 302045 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3271 302045 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3272 302045 : double adfDensity[4] = {};
3273 302045 : double adfReal[4] = {};
3274 302045 : double adfImag[4] = {};
3275 :
3276 : // Get the bilinear interpolation at the image borders.
3277 302045 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3278 286140 : iSrcY + 2 >= poWK->nSrcYSize)
3279 24670 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3280 24670 : pdfDensity, pdfReal, pdfImag);
3281 :
3282 277375 : double adfValueDens[4] = {};
3283 277375 : double adfValueReal[4] = {};
3284 277375 : double adfValueImag[4] = {};
3285 :
3286 277375 : double adfCoeffsX[4] = {};
3287 277375 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3288 :
3289 1240570 : for (GPtrDiff_t i = -1; i < 3; i++)
3290 : {
3291 1009640 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3292 998035 : 2, adfDensity, adfReal, adfImag) ||
3293 998035 : adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3294 980395 : adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3295 2979770 : adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3296 972094 : adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3297 : {
3298 46449 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3299 46449 : pdfDensity, pdfReal, pdfImag);
3300 : }
3301 :
3302 963196 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3303 963196 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3304 963196 : adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
3305 : }
3306 :
3307 : /* -------------------------------------------------------------------- */
3308 : /* For now, if we have any pixels missing in the kernel area, */
3309 : /* we fallback on using bilinear interpolation. Ideally we */
3310 : /* should do "weight adjustment" of our results similarly to */
3311 : /* what is done for the cubic spline and lanc. interpolators. */
3312 : /* -------------------------------------------------------------------- */
3313 :
3314 230926 : double adfCoeffsY[4] = {};
3315 230926 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3316 :
3317 230926 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3318 230926 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3319 230926 : *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
3320 :
3321 230926 : return true;
3322 : }
3323 :
3324 : #ifdef USE_SSE2
3325 :
3326 : /************************************************************************/
3327 : /* XMMLoad4Values() */
3328 : /* */
3329 : /* Load 4 packed byte or uint16, cast them to float and put them in a */
3330 : /* m128 register. */
3331 : /************************************************************************/
3332 :
3333 567016000 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
3334 : {
3335 : unsigned int i;
3336 567016000 : memcpy(&i, ptr, 4);
3337 1134030000 : __m128i xmm_i = _mm_cvtsi32_si128(i);
3338 : // Zero extend 4 packed unsigned 8-bit integers in a to packed
3339 : // 32-bit integers.
3340 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3341 : xmm_i = _mm_cvtepu8_epi32(xmm_i);
3342 : #else
3343 1134030000 : xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
3344 1134030000 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3345 : #endif
3346 1134030000 : return _mm_cvtepi32_ps(xmm_i);
3347 : }
3348 :
3349 1108340 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
3350 : {
3351 : GUInt64 i;
3352 1108340 : memcpy(&i, ptr, 8);
3353 2216690 : __m128i xmm_i = _mm_cvtsi64_si128(i);
3354 : // Zero extend 4 packed unsigned 16-bit integers in a to packed
3355 : // 32-bit integers.
3356 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3357 : xmm_i = _mm_cvtepu16_epi32(xmm_i);
3358 : #else
3359 2216690 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3360 : #endif
3361 2216690 : return _mm_cvtepi32_ps(xmm_i);
3362 : }
3363 :
3364 : /************************************************************************/
3365 : /* XMMHorizontalAdd() */
3366 : /* */
3367 : /* Return the sum of the 4 floating points of the register. */
3368 : /************************************************************************/
3369 :
3370 : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
3371 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3372 : {
3373 : __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1)
3374 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0)
3375 : shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2)
3376 : sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2)
3377 : return _mm_cvtss_f32(sums);
3378 : }
3379 : #else
3380 142031000 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3381 : {
3382 142031000 : __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2)
3383 142031000 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0)
3384 142031000 : shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1)
3385 142031000 : sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1)
3386 142031000 : return _mm_cvtss_f32(sums);
3387 : }
3388 : #endif
3389 :
3390 : #endif // define USE_SSE2
3391 :
3392 : /************************************************************************/
3393 : /* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */
3394 : /************************************************************************/
3395 :
3396 : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
3397 : // because there are a few assumptions above those types.
3398 : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
3399 : // perf benefit.
3400 :
3401 : template <class T>
3402 389755 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
3403 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3404 : double *pdfDensity, double *pdfReal)
3405 : {
3406 389755 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3407 389755 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3408 389755 : const GPtrDiff_t iSrcOffset =
3409 389755 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3410 :
3411 : // Get the bilinear interpolation at the image borders.
3412 389755 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3413 387271 : iSrcY + 2 >= poWK->nSrcYSize)
3414 : {
3415 2484 : double adfImagIgnored[4] = {};
3416 2484 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3417 2484 : pdfDensity, pdfReal, adfImagIgnored);
3418 : }
3419 :
3420 : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3421 : const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
3422 : const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
3423 :
3424 : // TODO(schwehr): Explain the magic numbers.
3425 : float afTemp[4 + 4 + 4 + 1];
3426 : float *pafAligned =
3427 : reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
3428 : float *pafCoeffs = pafAligned;
3429 : float *pafDensity = pafAligned + 4;
3430 : float *pafValue = pafAligned + 8;
3431 :
3432 : const float fHalfDeltaX = 0.5f * fDeltaX;
3433 : const float fThreeDeltaX = 3.0f * fDeltaX;
3434 : const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
3435 :
3436 : pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
3437 : pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
3438 : pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
3439 : pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
3440 : __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
3441 : const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD_FLOAT);
3442 :
3443 : __m128 xmmMaskLowDensity = _mm_setzero_ps();
3444 : for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
3445 : i++, iOffset += poWK->nSrcXSize)
3446 : {
3447 : const __m128 xmmDensity =
3448 : _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
3449 : xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
3450 : _mm_cmplt_ps(xmmDensity, xmmThreshold));
3451 : pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3452 :
3453 : const __m128 xmmValues =
3454 : XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
3455 : pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
3456 : }
3457 : if (_mm_movemask_ps(xmmMaskLowDensity))
3458 : {
3459 : double adfImagIgnored[4] = {};
3460 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3461 : pdfDensity, pdfReal, adfImagIgnored);
3462 : }
3463 :
3464 : const float fHalfDeltaY = 0.5f * fDeltaY;
3465 : const float fThreeDeltaY = 3.0f * fDeltaY;
3466 : const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
3467 :
3468 : pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
3469 : pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
3470 : pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
3471 : pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
3472 :
3473 : xmmCoeffs = _mm_load_ps(pafCoeffs);
3474 :
3475 : const __m128 xmmDensity = _mm_load_ps(pafDensity);
3476 : const __m128 xmmValue = _mm_load_ps(pafValue);
3477 : *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3478 : *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
3479 :
3480 : // We did all above computations on float32 whereas the general case is
3481 : // float64. Not sure if one is fundamentally more correct than the other
3482 : // one, but we want our optimization to give the same result as the
3483 : // general case as much as possible, so if the resulting value is
3484 : // close to some_int_value + 0.5, redo the computation with the general
3485 : // case.
3486 : // Note: If other types than Byte or UInt16, will need changes.
3487 : if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
3488 : return true;
3489 :
3490 : #endif // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3491 :
3492 387271 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3493 387271 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3494 :
3495 387271 : double adfValueDens[4] = {};
3496 387271 : double adfValueReal[4] = {};
3497 :
3498 387271 : double adfCoeffsX[4] = {};
3499 387271 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3500 :
3501 387271 : double adfCoeffsY[4] = {};
3502 387271 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3503 :
3504 1930200 : for (GPtrDiff_t i = -1; i < 3; i++)
3505 : {
3506 1544480 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3507 : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
3508 1544480 : if (poWK->pafUnifiedSrcDensity[iOffset + 0] <
3509 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3510 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 1] <
3511 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3512 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 2] <
3513 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3514 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 3] <
3515 : SRC_DENSITY_THRESHOLD_FLOAT)
3516 : {
3517 1551 : double adfImagIgnored[4] = {};
3518 1551 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3519 : pdfDensity, pdfReal,
3520 1551 : adfImagIgnored);
3521 : }
3522 : #endif
3523 :
3524 3085860 : adfValueDens[i + 1] =
3525 1542930 : CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
3526 :
3527 1542930 : adfValueReal[i + 1] = CONVOL4(
3528 : adfCoeffsX,
3529 1542930 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3530 : }
3531 :
3532 385720 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3533 385720 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3534 :
3535 385720 : return true;
3536 : }
3537 :
3538 : /************************************************************************/
3539 : /* GWKCubicResampleSrcMaskIsDensity4SampleReal() */
3540 : /* Bi-cubic when source has and only has pafUnifiedSrcDensity. */
3541 : /************************************************************************/
3542 :
3543 0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
3544 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3545 : double *pdfDensity, double *pdfReal)
3546 :
3547 : {
3548 0 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3549 0 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3550 0 : const GPtrDiff_t iSrcOffset =
3551 0 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3552 0 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3553 0 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3554 :
3555 : // Get the bilinear interpolation at the image borders.
3556 0 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3557 0 : iSrcY + 2 >= poWK->nSrcYSize)
3558 : {
3559 0 : double adfImagIgnored[4] = {};
3560 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3561 0 : pdfDensity, pdfReal, adfImagIgnored);
3562 : }
3563 :
3564 0 : double adfCoeffsX[4] = {};
3565 0 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3566 :
3567 0 : double adfCoeffsY[4] = {};
3568 0 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3569 :
3570 0 : double adfValueDens[4] = {};
3571 0 : double adfValueReal[4] = {};
3572 0 : double adfDensity[4] = {};
3573 0 : double adfReal[4] = {};
3574 0 : double adfImagIgnored[4] = {};
3575 :
3576 0 : for (GPtrDiff_t i = -1; i < 3; i++)
3577 : {
3578 0 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3579 0 : 2, adfDensity, adfReal, adfImagIgnored) ||
3580 0 : adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3581 0 : adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3582 0 : adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3583 0 : adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3584 : {
3585 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3586 : pdfDensity, pdfReal,
3587 0 : adfImagIgnored);
3588 : }
3589 :
3590 0 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3591 0 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3592 : }
3593 :
3594 0 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3595 0 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3596 :
3597 0 : return true;
3598 : }
3599 :
3600 : template <class T>
3601 2300964 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3602 : int iBand, double dfSrcX,
3603 : double dfSrcY, T *pValue)
3604 :
3605 : {
3606 2300964 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3607 2300964 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3608 2300964 : const GPtrDiff_t iSrcOffset =
3609 2300964 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3610 2300964 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3611 2300964 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3612 2300964 : const double dfDeltaY2 = dfDeltaY * dfDeltaY;
3613 2300964 : const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
3614 :
3615 : // Get the bilinear interpolation at the image borders.
3616 2300964 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3617 1883033 : iSrcY + 2 >= poWK->nSrcYSize)
3618 490244 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3619 490244 : pValue);
3620 :
3621 1810720 : double adfCoeffs[4] = {};
3622 1810720 : GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
3623 :
3624 1810720 : double adfValue[4] = {};
3625 :
3626 9053590 : for (GPtrDiff_t i = -1; i < 3; i++)
3627 : {
3628 7242876 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3629 :
3630 7242876 : adfValue[i + 1] = CONVOL4(
3631 : adfCoeffs,
3632 7242876 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3633 : }
3634 :
3635 : const double dfValue =
3636 1810720 : CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
3637 : adfValue[1], adfValue[2], adfValue[3]);
3638 :
3639 1810720 : *pValue = GWKClampValueT<T>(dfValue);
3640 :
3641 1810720 : return true;
3642 : }
3643 :
3644 : /************************************************************************/
3645 : /* GWKLanczosSinc() */
3646 : /************************************************************************/
3647 :
3648 : /*
3649 : * Lanczos windowed sinc interpolation kernel with radius r.
3650 : * /
3651 : * | sinc(x) * sinc(x/r), if |x| < r
3652 : * L(x) = | 1, if x = 0 ,
3653 : * | 0, otherwise
3654 : * \
3655 : *
3656 : * where sinc(x) = sin(PI * x) / (PI * x).
3657 : */
3658 :
3659 1632 : static double GWKLanczosSinc(double dfX)
3660 : {
3661 1632 : if (dfX == 0.0)
3662 0 : return 1.0;
3663 :
3664 1632 : const double dfPIX = M_PI * dfX;
3665 1632 : const double dfPIXoverR = dfPIX / 3;
3666 1632 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3667 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3668 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3669 1632 : const double dfSinPIXoverR = sin(dfPIXoverR);
3670 1632 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3671 1632 : const double dfSinPIXMulSinPIXoverR =
3672 1632 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3673 1632 : return dfSinPIXMulSinPIXoverR / dfPIX2overR;
3674 : }
3675 :
3676 106692 : static double GWKLanczosSinc4Values(double *padfValues)
3677 : {
3678 533460 : for (int i = 0; i < 4; i++)
3679 : {
3680 426768 : if (padfValues[i] == 0.0)
3681 : {
3682 0 : padfValues[i] = 1.0;
3683 : }
3684 : else
3685 : {
3686 426768 : const double dfPIX = M_PI * padfValues[i];
3687 426768 : const double dfPIXoverR = dfPIX / 3;
3688 426768 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3689 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3690 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3691 426768 : const double dfSinPIXoverR = sin(dfPIXoverR);
3692 426768 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3693 426768 : const double dfSinPIXMulSinPIXoverR =
3694 426768 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3695 426768 : padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
3696 : }
3697 : }
3698 106692 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3699 : }
3700 :
3701 : /************************************************************************/
3702 : /* GWKBilinear() */
3703 : /************************************************************************/
3704 :
3705 6336240 : static double GWKBilinear(double dfX)
3706 : {
3707 6336240 : double dfAbsX = fabs(dfX);
3708 6336240 : if (dfAbsX <= 1.0)
3709 5866920 : return 1 - dfAbsX;
3710 : else
3711 469322 : return 0.0;
3712 : }
3713 :
3714 106410 : static double GWKBilinear4Values(double *padfValues)
3715 : {
3716 106410 : double dfAbsX0 = fabs(padfValues[0]);
3717 106410 : double dfAbsX1 = fabs(padfValues[1]);
3718 106410 : double dfAbsX2 = fabs(padfValues[2]);
3719 106410 : double dfAbsX3 = fabs(padfValues[3]);
3720 106410 : if (dfAbsX0 <= 1.0)
3721 106410 : padfValues[0] = 1 - dfAbsX0;
3722 : else
3723 0 : padfValues[0] = 0.0;
3724 106410 : if (dfAbsX1 <= 1.0)
3725 106410 : padfValues[1] = 1 - dfAbsX1;
3726 : else
3727 0 : padfValues[1] = 0.0;
3728 106410 : if (dfAbsX2 <= 1.0)
3729 106410 : padfValues[2] = 1 - dfAbsX2;
3730 : else
3731 0 : padfValues[2] = 0.0;
3732 106410 : if (dfAbsX3 <= 1.0)
3733 106394 : padfValues[3] = 1 - dfAbsX3;
3734 : else
3735 16 : padfValues[3] = 0.0;
3736 106410 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3737 : }
3738 :
3739 : /************************************************************************/
3740 : /* GWKCubic() */
3741 : /************************************************************************/
3742 :
3743 82838 : static double GWKCubic(double dfX)
3744 : {
3745 82838 : return CubicKernel(dfX);
3746 : }
3747 :
3748 2442490 : static double GWKCubic4Values(double *padfValues)
3749 : {
3750 2442490 : const double dfAbsX_0 = fabs(padfValues[0]);
3751 2442490 : const double dfAbsX_1 = fabs(padfValues[1]);
3752 2442490 : const double dfAbsX_2 = fabs(padfValues[2]);
3753 2442490 : const double dfAbsX_3 = fabs(padfValues[3]);
3754 2442490 : const double dfX2_0 = padfValues[0] * padfValues[0];
3755 2442490 : const double dfX2_1 = padfValues[1] * padfValues[1];
3756 2442490 : const double dfX2_2 = padfValues[2] * padfValues[2];
3757 2442490 : const double dfX2_3 = padfValues[3] * padfValues[3];
3758 :
3759 2442490 : double dfVal0 = 0.0;
3760 2442490 : if (dfAbsX_0 <= 1.0)
3761 855505 : dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
3762 1586990 : else if (dfAbsX_0 <= 2.0)
3763 1586810 : dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
3764 :
3765 2442490 : double dfVal1 = 0.0;
3766 2442490 : if (dfAbsX_1 <= 1.0)
3767 1583220 : dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
3768 859273 : else if (dfAbsX_1 <= 2.0)
3769 859273 : dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
3770 :
3771 2442490 : double dfVal2 = 0.0;
3772 2442490 : if (dfAbsX_2 <= 1.0)
3773 1594220 : dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
3774 848269 : else if (dfAbsX_2 <= 2.0)
3775 848269 : dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
3776 :
3777 2442490 : double dfVal3 = 0.0;
3778 2442490 : if (dfAbsX_3 <= 1.0)
3779 866232 : dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
3780 1576260 : else if (dfAbsX_3 <= 2.0)
3781 1576100 : dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
3782 :
3783 2442490 : padfValues[0] = dfVal0;
3784 2442490 : padfValues[1] = dfVal1;
3785 2442490 : padfValues[2] = dfVal2;
3786 2442490 : padfValues[3] = dfVal3;
3787 2442490 : return dfVal0 + dfVal1 + dfVal2 + dfVal3;
3788 : }
3789 :
3790 : /************************************************************************/
3791 : /* GWKBSpline() */
3792 : /************************************************************************/
3793 :
3794 : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
3795 : // Equation 8 with (B,C)=(1,0)
3796 : // 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1
3797 : // 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| >= 1 and |x| < 2
3798 :
3799 136640 : static double GWKBSpline(double x)
3800 : {
3801 136640 : const double xp2 = x + 2.0;
3802 136640 : const double xp1 = x + 1.0;
3803 136640 : const double xm1 = x - 1.0;
3804 :
3805 : // This will most likely be used, so we'll compute it ahead of time to
3806 : // avoid stalling the processor.
3807 136640 : const double xp2c = xp2 * xp2 * xp2;
3808 :
3809 : // Note that the test is computed only if it is needed.
3810 : // TODO(schwehr): Make this easier to follow.
3811 : return xp2 > 0.0
3812 273280 : ? ((xp1 > 0.0)
3813 136640 : ? ((x > 0.0)
3814 122246 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3815 87748 : 6.0 * x * x * x
3816 : : 0.0) +
3817 122246 : -4.0 * xp1 * xp1 * xp1
3818 : : 0.0) +
3819 : xp2c
3820 136640 : : 0.0; // * 0.166666666666666666666
3821 : }
3822 :
3823 1895050 : static double GWKBSpline4Values(double *padfValues)
3824 : {
3825 9475260 : for (int i = 0; i < 4; i++)
3826 : {
3827 7580210 : const double x = padfValues[i];
3828 7580210 : const double xp2 = x + 2.0;
3829 7580210 : const double xp1 = x + 1.0;
3830 7580210 : const double xm1 = x - 1.0;
3831 :
3832 : // This will most likely be used, so we'll compute it ahead of time to
3833 : // avoid stalling the processor.
3834 7580210 : const double xp2c = xp2 * xp2 * xp2;
3835 :
3836 : // Note that the test is computed only if it is needed.
3837 : // TODO(schwehr): Make this easier to follow.
3838 7580210 : padfValues[i] =
3839 : (xp2 > 0.0)
3840 15103600 : ? ((xp1 > 0.0)
3841 7523380 : ? ((x > 0.0)
3842 5656250 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3843 3788270 : 6.0 * x * x * x
3844 : : 0.0) +
3845 5656250 : -4.0 * xp1 * xp1 * xp1
3846 : : 0.0) +
3847 : xp2c
3848 : : 0.0; // * 0.166666666666666666666
3849 : }
3850 1895050 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3851 : }
3852 : /************************************************************************/
3853 : /* GWKResampleWrkStruct */
3854 : /************************************************************************/
3855 :
3856 : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
3857 :
3858 : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
3859 : double dfSrcX, double dfSrcY,
3860 : double *pdfDensity, double *pdfReal,
3861 : double *pdfImag,
3862 : GWKResampleWrkStruct *psWrkStruct);
3863 :
3864 : struct _GWKResampleWrkStruct
3865 : {
3866 : pfnGWKResampleType pfnGWKResample;
3867 :
3868 : // Space for saved X weights.
3869 : double *padfWeightsX;
3870 : bool *pabCalcX;
3871 :
3872 : double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos.
3873 : int iLastSrcX; // Only used by GWKResampleOptimizedLanczos.
3874 : int iLastSrcY; // Only used by GWKResampleOptimizedLanczos.
3875 : double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos.
3876 : double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos.
3877 : double dfCosPiXScale; // Only used by GWKResampleOptimizedLanczos.
3878 : double dfSinPiXScale; // Only used by GWKResampleOptimizedLanczos.
3879 : double dfCosPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3880 : double dfSinPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3881 : double dfCosPiYScale; // Only used by GWKResampleOptimizedLanczos.
3882 : double dfSinPiYScale; // Only used by GWKResampleOptimizedLanczos.
3883 : double dfCosPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3884 : double dfSinPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3885 :
3886 : // Space for saving a row of pixels.
3887 : double *padfRowDensity;
3888 : double *padfRowReal;
3889 : double *padfRowImag;
3890 : };
3891 :
3892 : /************************************************************************/
3893 : /* GWKResampleCreateWrkStruct() */
3894 : /************************************************************************/
3895 :
3896 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3897 : double dfSrcY, double *pdfDensity, double *pdfReal,
3898 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
3899 :
3900 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3901 : double dfSrcX, double dfSrcY,
3902 : double *pdfDensity, double *pdfReal,
3903 : double *pdfImag,
3904 : GWKResampleWrkStruct *psWrkStruct);
3905 :
3906 401 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
3907 : {
3908 401 : const int nXDist = (poWK->nXRadius + 1) * 2;
3909 401 : const int nYDist = (poWK->nYRadius + 1) * 2;
3910 :
3911 : GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
3912 401 : CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
3913 :
3914 : // Alloc space for saved X weights.
3915 401 : psWrkStruct->padfWeightsX =
3916 401 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3917 401 : psWrkStruct->pabCalcX =
3918 401 : static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
3919 :
3920 401 : psWrkStruct->padfWeightsY =
3921 401 : static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
3922 401 : psWrkStruct->iLastSrcX = -10;
3923 401 : psWrkStruct->iLastSrcY = -10;
3924 401 : psWrkStruct->dfLastDeltaX = -10;
3925 401 : psWrkStruct->dfLastDeltaY = -10;
3926 :
3927 : // Alloc space for saving a row of pixels.
3928 401 : if (poWK->pafUnifiedSrcDensity == nullptr &&
3929 365 : poWK->panUnifiedSrcValid == nullptr &&
3930 342 : poWK->papanBandSrcValid == nullptr)
3931 : {
3932 342 : psWrkStruct->padfRowDensity = nullptr;
3933 : }
3934 : else
3935 : {
3936 59 : psWrkStruct->padfRowDensity =
3937 59 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3938 : }
3939 401 : psWrkStruct->padfRowReal =
3940 401 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3941 401 : psWrkStruct->padfRowImag =
3942 401 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3943 :
3944 401 : if (poWK->eResample == GRA_Lanczos)
3945 : {
3946 65 : psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
3947 :
3948 65 : if (poWK->dfXScale < 1)
3949 : {
3950 4 : psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
3951 4 : psWrkStruct->dfSinPiXScaleOver3 =
3952 4 : sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
3953 4 : psWrkStruct->dfCosPiXScaleOver3);
3954 : // "Naive":
3955 : // const double dfCosPiXScale = cos( M_PI * dfXScale );
3956 : // const double dfSinPiXScale = sin( M_PI * dfXScale );
3957 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3958 4 : psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
3959 4 : psWrkStruct->dfCosPiXScaleOver3 -
3960 4 : 3) *
3961 4 : psWrkStruct->dfCosPiXScaleOver3;
3962 4 : psWrkStruct->dfSinPiXScale = sqrt(
3963 4 : 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
3964 : }
3965 :
3966 65 : if (poWK->dfYScale < 1)
3967 : {
3968 12 : psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
3969 12 : psWrkStruct->dfSinPiYScaleOver3 =
3970 12 : sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
3971 12 : psWrkStruct->dfCosPiYScaleOver3);
3972 : // "Naive":
3973 : // const double dfCosPiYScale = cos( M_PI * dfYScale );
3974 : // const double dfSinPiYScale = sin( M_PI * dfYScale );
3975 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3976 12 : psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
3977 12 : psWrkStruct->dfCosPiYScaleOver3 -
3978 12 : 3) *
3979 12 : psWrkStruct->dfCosPiYScaleOver3;
3980 12 : psWrkStruct->dfSinPiYScale = sqrt(
3981 12 : 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
3982 : }
3983 : }
3984 : else
3985 336 : psWrkStruct->pfnGWKResample = GWKResample;
3986 :
3987 401 : return psWrkStruct;
3988 : }
3989 :
3990 : /************************************************************************/
3991 : /* GWKResampleDeleteWrkStruct() */
3992 : /************************************************************************/
3993 :
3994 401 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
3995 : {
3996 401 : CPLFree(psWrkStruct->padfWeightsX);
3997 401 : CPLFree(psWrkStruct->padfWeightsY);
3998 401 : CPLFree(psWrkStruct->pabCalcX);
3999 401 : CPLFree(psWrkStruct->padfRowDensity);
4000 401 : CPLFree(psWrkStruct->padfRowReal);
4001 401 : CPLFree(psWrkStruct->padfRowImag);
4002 401 : CPLFree(psWrkStruct);
4003 401 : }
4004 :
4005 : /************************************************************************/
4006 : /* GWKResample() */
4007 : /************************************************************************/
4008 :
4009 239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4010 : double dfSrcY, double *pdfDensity, double *pdfReal,
4011 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
4012 :
4013 : {
4014 : // Save as local variables to avoid following pointers in loops.
4015 239383 : const int nSrcXSize = poWK->nSrcXSize;
4016 239383 : const int nSrcYSize = poWK->nSrcYSize;
4017 :
4018 239383 : double dfAccumulatorReal = 0.0;
4019 239383 : double dfAccumulatorImag = 0.0;
4020 239383 : double dfAccumulatorDensity = 0.0;
4021 239383 : double dfAccumulatorWeight = 0.0;
4022 239383 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4023 239383 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4024 239383 : const GPtrDiff_t iSrcOffset =
4025 239383 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4026 239383 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4027 239383 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4028 :
4029 239383 : const double dfXScale = poWK->dfXScale;
4030 239383 : const double dfYScale = poWK->dfYScale;
4031 :
4032 239383 : const int nXDist = (poWK->nXRadius + 1) * 2;
4033 :
4034 : // Space for saved X weights.
4035 239383 : double *padfWeightsX = psWrkStruct->padfWeightsX;
4036 239383 : bool *pabCalcX = psWrkStruct->pabCalcX;
4037 :
4038 : // Space for saving a row of pixels.
4039 239383 : double *padfRowDensity = psWrkStruct->padfRowDensity;
4040 239383 : double *padfRowReal = psWrkStruct->padfRowReal;
4041 239383 : double *padfRowImag = psWrkStruct->padfRowImag;
4042 :
4043 : // Mark as needing calculation (don't calculate the weights yet,
4044 : // because a mask may render it unnecessary).
4045 239383 : memset(pabCalcX, false, nXDist * sizeof(bool));
4046 :
4047 239383 : FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
4048 239383 : CPLAssert(pfnGetWeight);
4049 :
4050 : // Skip sampling over edge of image.
4051 239383 : int j = poWK->nFiltInitY;
4052 239383 : int jMax = poWK->nYRadius;
4053 239383 : if (iSrcY + j < 0)
4054 566 : j = -iSrcY;
4055 239383 : if (iSrcY + jMax >= nSrcYSize)
4056 662 : jMax = nSrcYSize - iSrcY - 1;
4057 :
4058 239383 : int iMin = poWK->nFiltInitX;
4059 239383 : int iMax = poWK->nXRadius;
4060 239383 : if (iSrcX + iMin < 0)
4061 566 : iMin = -iSrcX;
4062 239383 : if (iSrcX + iMax >= nSrcXSize)
4063 659 : iMax = nSrcXSize - iSrcX - 1;
4064 :
4065 239383 : const int bXScaleBelow1 = (dfXScale < 1.0);
4066 239383 : const int bYScaleBelow1 = (dfYScale < 1.0);
4067 :
4068 239383 : GPtrDiff_t iRowOffset =
4069 239383 : iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
4070 :
4071 : // Loop over pixel rows in the kernel.
4072 1445930 : for (; j <= jMax; ++j)
4073 : {
4074 1206540 : iRowOffset += nSrcXSize;
4075 :
4076 : // Get pixel values.
4077 : // We can potentially read extra elements after the "normal" end of the
4078 : // source arrays, but the contract of papabySrcImage[iBand],
4079 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4080 : // is to have WARP_EXTRA_ELTS reserved at their end.
4081 1206540 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4082 : padfRowDensity, padfRowReal, padfRowImag))
4083 72 : continue;
4084 :
4085 : // Calculate the Y weight.
4086 : double dfWeight1 = (bYScaleBelow1)
4087 1206470 : ? pfnGetWeight((j - dfDeltaY) * dfYScale)
4088 1600 : : pfnGetWeight(j - dfDeltaY);
4089 :
4090 : // Iterate over pixels in row.
4091 1206470 : double dfAccumulatorRealLocal = 0.0;
4092 1206470 : double dfAccumulatorImagLocal = 0.0;
4093 1206470 : double dfAccumulatorDensityLocal = 0.0;
4094 1206470 : double dfAccumulatorWeightLocal = 0.0;
4095 :
4096 7317420 : for (int i = iMin; i <= iMax; ++i)
4097 : {
4098 : // Skip sampling if pixel has zero density.
4099 6110940 : if (padfRowDensity != nullptr &&
4100 77277 : padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
4101 546 : continue;
4102 :
4103 6110400 : double dfWeight2 = 0.0;
4104 :
4105 : // Make or use a cached set of weights for this row.
4106 6110400 : if (pabCalcX[i - iMin])
4107 : {
4108 : // Use saved weight value instead of recomputing it.
4109 4903920 : dfWeight2 = padfWeightsX[i - iMin];
4110 : }
4111 : else
4112 : {
4113 : // Calculate & save the X weight.
4114 1206480 : padfWeightsX[i - iMin] = dfWeight2 =
4115 1206480 : (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
4116 1600 : : pfnGetWeight(i - dfDeltaX);
4117 :
4118 1206480 : pabCalcX[i - iMin] = true;
4119 : }
4120 :
4121 : // Accumulate!
4122 6110400 : dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
4123 6110400 : dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
4124 6110400 : if (padfRowDensity != nullptr)
4125 76731 : dfAccumulatorDensityLocal +=
4126 76731 : padfRowDensity[i - iMin] * dfWeight2;
4127 6110400 : dfAccumulatorWeightLocal += dfWeight2;
4128 : }
4129 :
4130 1206470 : dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
4131 1206470 : dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
4132 1206470 : dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
4133 1206470 : dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
4134 : }
4135 :
4136 239383 : if (dfAccumulatorWeight < 0.000001 ||
4137 1887 : (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
4138 : {
4139 0 : *pdfDensity = 0.0;
4140 0 : return false;
4141 : }
4142 :
4143 : // Calculate the output taking into account weighting.
4144 239383 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4145 : {
4146 239380 : *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
4147 239380 : *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
4148 239380 : if (padfRowDensity != nullptr)
4149 1884 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
4150 : else
4151 237496 : *pdfDensity = 1.0;
4152 : }
4153 : else
4154 : {
4155 3 : *pdfReal = dfAccumulatorReal;
4156 3 : *pdfImag = dfAccumulatorImag;
4157 3 : if (padfRowDensity != nullptr)
4158 3 : *pdfDensity = dfAccumulatorDensity;
4159 : else
4160 0 : *pdfDensity = 1.0;
4161 : }
4162 :
4163 239383 : return true;
4164 : }
4165 :
4166 : /************************************************************************/
4167 : /* GWKResampleOptimizedLanczos() */
4168 : /************************************************************************/
4169 :
4170 634574 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
4171 : double dfSrcX, double dfSrcY,
4172 : double *pdfDensity, double *pdfReal,
4173 : double *pdfImag,
4174 : GWKResampleWrkStruct *psWrkStruct)
4175 :
4176 : {
4177 : // Save as local variables to avoid following pointers in loops.
4178 634574 : const int nSrcXSize = poWK->nSrcXSize;
4179 634574 : const int nSrcYSize = poWK->nSrcYSize;
4180 :
4181 634574 : double dfAccumulatorReal = 0.0;
4182 634574 : double dfAccumulatorImag = 0.0;
4183 634574 : double dfAccumulatorDensity = 0.0;
4184 634574 : double dfAccumulatorWeight = 0.0;
4185 634574 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4186 634574 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4187 634574 : const GPtrDiff_t iSrcOffset =
4188 634574 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4189 634574 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4190 634574 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4191 :
4192 634574 : const double dfXScale = poWK->dfXScale;
4193 634574 : const double dfYScale = poWK->dfYScale;
4194 :
4195 : // Space for saved X weights.
4196 634574 : double *const padfWeightsXShifted =
4197 634574 : psWrkStruct->padfWeightsX - poWK->nFiltInitX;
4198 634574 : double *const padfWeightsYShifted =
4199 634574 : psWrkStruct->padfWeightsY - poWK->nFiltInitY;
4200 :
4201 : // Space for saving a row of pixels.
4202 634574 : double *const padfRowDensity = psWrkStruct->padfRowDensity;
4203 634574 : double *const padfRowReal = psWrkStruct->padfRowReal;
4204 634574 : double *const padfRowImag = psWrkStruct->padfRowImag;
4205 :
4206 : // Skip sampling over edge of image.
4207 634574 : int jMin = poWK->nFiltInitY;
4208 634574 : int jMax = poWK->nYRadius;
4209 634574 : if (iSrcY + jMin < 0)
4210 17334 : jMin = -iSrcY;
4211 634574 : if (iSrcY + jMax >= nSrcYSize)
4212 5638 : jMax = nSrcYSize - iSrcY - 1;
4213 :
4214 634574 : int iMin = poWK->nFiltInitX;
4215 634574 : int iMax = poWK->nXRadius;
4216 634574 : if (iSrcX + iMin < 0)
4217 19595 : iMin = -iSrcX;
4218 634574 : if (iSrcX + iMax >= nSrcXSize)
4219 6817 : iMax = nSrcXSize - iSrcX - 1;
4220 :
4221 634574 : if (dfXScale < 1.0)
4222 : {
4223 462945 : while ((iMin - dfDeltaX) * dfXScale < -3.0)
4224 260083 : iMin++;
4225 263534 : while ((iMax - dfDeltaX) * dfXScale > 3.0)
4226 60672 : iMax--;
4227 :
4228 : // clang-format off
4229 : /*
4230 : Naive version:
4231 : for (int i = iMin; i <= iMax; ++i)
4232 : {
4233 : psWrkStruct->padfWeightsXShifted[i] =
4234 : GWKLanczosSinc((i - dfDeltaX) * dfXScale);
4235 : }
4236 :
4237 : but given that:
4238 :
4239 : GWKLanczosSinc(x):
4240 : if (dfX == 0.0)
4241 : return 1.0;
4242 :
4243 : const double dfPIX = M_PI * dfX;
4244 : const double dfPIXoverR = dfPIX / 3;
4245 : const double dfPIX2overR = dfPIX * dfPIXoverR;
4246 : return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
4247 :
4248 : and
4249 : sin (a + b) = sin a cos b + cos a sin b.
4250 : cos (a + b) = cos a cos b - sin a sin b.
4251 :
4252 : we can skip any sin() computation within the loop
4253 : */
4254 : // clang-format on
4255 :
4256 202862 : if (iSrcX != psWrkStruct->iLastSrcX ||
4257 131072 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4258 : {
4259 71790 : double dfX = (iMin - dfDeltaX) * dfXScale;
4260 :
4261 71790 : double dfPIXover3 = M_PI / 3 * dfX;
4262 71790 : double dfCosOver3 = cos(dfPIXover3);
4263 71790 : double dfSinOver3 = sin(dfPIXover3);
4264 :
4265 : // "Naive":
4266 : // double dfSin = sin( M_PI * dfX );
4267 : // double dfCos = cos( M_PI * dfX );
4268 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4269 71790 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4270 71790 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4271 :
4272 71790 : const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
4273 71790 : const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
4274 71790 : const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
4275 71790 : const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
4276 71790 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4277 71790 : padfWeightsXShifted[iMin] =
4278 71790 : dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
4279 683646 : for (int i = iMin + 1; i <= iMax; ++i)
4280 : {
4281 611856 : dfX += dfXScale;
4282 611856 : const double dfNewSin =
4283 611856 : dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
4284 611856 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
4285 611856 : dfCosOver3 * dfSinPiXScaleOver3;
4286 611856 : padfWeightsXShifted[i] =
4287 : dfX == 0
4288 611856 : ? 1.0
4289 611856 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
4290 611856 : const double dfNewCos =
4291 611856 : dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
4292 611856 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
4293 611856 : dfSinOver3 * dfSinPiXScaleOver3;
4294 611856 : dfSin = dfNewSin;
4295 611856 : dfCos = dfNewCos;
4296 611856 : dfSinOver3 = dfNewSinOver3;
4297 611856 : dfCosOver3 = dfNewCosOver3;
4298 : }
4299 :
4300 71790 : psWrkStruct->iLastSrcX = iSrcX;
4301 71790 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4302 : }
4303 : }
4304 : else
4305 : {
4306 789372 : while (iMin - dfDeltaX < -3.0)
4307 357660 : iMin++;
4308 431712 : while (iMax - dfDeltaX > 3.0)
4309 0 : iMax--;
4310 :
4311 431712 : if (iSrcX != psWrkStruct->iLastSrcX ||
4312 225330 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4313 : {
4314 : // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
4315 : // following trigonometric formulas.
4316 :
4317 : // TODO(schwehr): Move this somewhere where it can be rendered at
4318 : // LaTeX.
4319 : // clang-format off
4320 : // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
4321 : // cos(M_PI * dfBase) * sin(M_PI * k)
4322 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
4323 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
4324 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
4325 :
4326 : // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
4327 : // cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
4328 : // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
4329 : // clang-format on
4330 :
4331 420092 : const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
4332 420092 : const double dfSin2PIDeltaXOver3 =
4333 : dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
4334 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
4335 420092 : const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
4336 420092 : const double dfSinPIDeltaX =
4337 420092 : (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
4338 420092 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4339 420092 : const double dfInvPI2Over3xSinPIDeltaX =
4340 : dfInvPI2Over3 * dfSinPIDeltaX;
4341 420092 : const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
4342 420092 : -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
4343 420092 : const double dfSinPIOver3 = 0.8660254037844386;
4344 420092 : const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
4345 420092 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
4346 : const double padfCst[] = {
4347 420092 : dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
4348 420092 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
4349 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
4350 420092 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
4351 420092 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
4352 :
4353 2974940 : for (int i = iMin; i <= iMax; ++i)
4354 : {
4355 2554850 : const double dfX = i - dfDeltaX;
4356 2554850 : if (dfX == 0.0)
4357 58282 : padfWeightsXShifted[i] = 1.0;
4358 : else
4359 2496570 : padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
4360 : #if DEBUG_VERBOSE
4361 : // TODO(schwehr): AlmostEqual.
4362 : // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
4363 : // GWKLanczosSinc(dfX, 3.0)) < 1e-10);
4364 : #endif
4365 : }
4366 :
4367 420092 : psWrkStruct->iLastSrcX = iSrcX;
4368 420092 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4369 : }
4370 : }
4371 :
4372 634574 : if (dfYScale < 1.0)
4373 : {
4374 15754 : while ((jMin - dfDeltaY) * dfYScale < -3.0)
4375 9500 : jMin++;
4376 9854 : while ((jMax - dfDeltaY) * dfYScale > 3.0)
4377 3600 : jMax--;
4378 :
4379 : // clang-format off
4380 : /*
4381 : Naive version:
4382 : for (int j = jMin; j <= jMax; ++j)
4383 : {
4384 : padfWeightsYShifted[j] =
4385 : GWKLanczosSinc((j - dfDeltaY) * dfYScale);
4386 : }
4387 : */
4388 : // clang-format on
4389 :
4390 6254 : if (iSrcY != psWrkStruct->iLastSrcY ||
4391 6127 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4392 : {
4393 127 : double dfY = (jMin - dfDeltaY) * dfYScale;
4394 :
4395 127 : double dfPIYover3 = M_PI / 3 * dfY;
4396 127 : double dfCosOver3 = cos(dfPIYover3);
4397 127 : double dfSinOver3 = sin(dfPIYover3);
4398 :
4399 : // "Naive":
4400 : // double dfSin = sin( M_PI * dfY );
4401 : // double dfCos = cos( M_PI * dfY );
4402 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4403 127 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4404 127 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4405 :
4406 127 : const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
4407 127 : const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
4408 127 : const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
4409 127 : const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
4410 127 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4411 127 : padfWeightsYShifted[jMin] =
4412 127 : dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
4413 1210 : for (int j = jMin + 1; j <= jMax; ++j)
4414 : {
4415 1083 : dfY += dfYScale;
4416 1083 : const double dfNewSin =
4417 1083 : dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
4418 1083 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
4419 1083 : dfCosOver3 * dfSinPiYScaleOver3;
4420 1083 : padfWeightsYShifted[j] =
4421 : dfY == 0
4422 1083 : ? 1.0
4423 1083 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
4424 1083 : const double dfNewCos =
4425 1083 : dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
4426 1083 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
4427 1083 : dfSinOver3 * dfSinPiYScaleOver3;
4428 1083 : dfSin = dfNewSin;
4429 1083 : dfCos = dfNewCos;
4430 1083 : dfSinOver3 = dfNewSinOver3;
4431 1083 : dfCosOver3 = dfNewCosOver3;
4432 : }
4433 :
4434 127 : psWrkStruct->iLastSrcY = iSrcY;
4435 127 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4436 : }
4437 : }
4438 : else
4439 : {
4440 1106550 : while (jMin - dfDeltaY < -3.0)
4441 478232 : jMin++;
4442 628320 : while (jMax - dfDeltaY > 3.0)
4443 0 : jMax--;
4444 :
4445 628320 : if (iSrcY != psWrkStruct->iLastSrcY ||
4446 627488 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4447 : {
4448 7198 : const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
4449 7198 : const double dfSin2PIDeltaYOver3 =
4450 : dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
4451 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
4452 7198 : const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
4453 7198 : const double dfSinPIDeltaY =
4454 7198 : (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
4455 7198 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4456 7198 : const double dfInvPI2Over3xSinPIDeltaY =
4457 : dfInvPI2Over3 * dfSinPIDeltaY;
4458 7198 : const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
4459 7198 : -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
4460 7198 : const double dfSinPIOver3 = 0.8660254037844386;
4461 7198 : const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
4462 7198 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
4463 : const double padfCst[] = {
4464 7198 : dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
4465 7198 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
4466 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
4467 7198 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
4468 7198 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
4469 :
4470 47777 : for (int j = jMin; j <= jMax; ++j)
4471 : {
4472 40579 : const double dfY = j - dfDeltaY;
4473 40579 : if (dfY == 0.0)
4474 468 : padfWeightsYShifted[j] = 1.0;
4475 : else
4476 40111 : padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
4477 : #if DEBUG_VERBOSE
4478 : // TODO(schwehr): AlmostEqual.
4479 : // CPLAssert(fabs(padfWeightsYShifted[j] -
4480 : // GWKLanczosSinc(dfY, 3.0)) < 1e-10);
4481 : #endif
4482 : }
4483 :
4484 7198 : psWrkStruct->iLastSrcY = iSrcY;
4485 7198 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4486 : }
4487 : }
4488 :
4489 : // If we have no density information, we can simply compute the
4490 : // accumulated weight.
4491 634574 : if (padfRowDensity == nullptr)
4492 : {
4493 634574 : double dfRowAccWeight = 0.0;
4494 5159250 : for (int i = iMin; i <= iMax; ++i)
4495 : {
4496 4524680 : dfRowAccWeight += padfWeightsXShifted[i];
4497 : }
4498 634574 : double dfColAccWeight = 0.0;
4499 4564130 : for (int j = jMin; j <= jMax; ++j)
4500 : {
4501 3929550 : dfColAccWeight += padfWeightsYShifted[j];
4502 : }
4503 634574 : dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
4504 : }
4505 :
4506 : // Loop over pixel rows in the kernel.
4507 :
4508 634574 : if (poWK->eWorkingDataType == GDT_UInt8 && !poWK->panUnifiedSrcValid &&
4509 633954 : !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
4510 : !padfRowDensity)
4511 : {
4512 : // Optimization for Byte case without any masking/alpha
4513 :
4514 633954 : if (dfAccumulatorWeight < 0.000001)
4515 : {
4516 0 : *pdfDensity = 0.0;
4517 0 : return false;
4518 : }
4519 :
4520 633954 : const GByte *pSrc =
4521 633954 : reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
4522 633954 : pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4523 :
4524 : #if defined(USE_SSE2)
4525 633954 : if (iMax - iMin + 1 == 6)
4526 : {
4527 : // This is just an optimized version of the general case in
4528 : // the else clause.
4529 :
4530 359916 : pSrc += iMin;
4531 359916 : int j = jMin;
4532 : const auto fourXWeights =
4533 359916 : XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
4534 :
4535 : // Process 2 lines at the same time.
4536 1424180 : for (; j < jMax; j += 2)
4537 : {
4538 : const XMMReg4Double v_acc =
4539 1064270 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4540 : const XMMReg4Double v_acc2 =
4541 1064270 : XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
4542 1064270 : const double dfRowAcc = v_acc.GetHorizSum();
4543 1064270 : const double dfRowAccEnd =
4544 1064270 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4545 1064270 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4546 1064270 : dfAccumulatorReal +=
4547 1064270 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4548 1064270 : const double dfRowAcc2 = v_acc2.GetHorizSum();
4549 1064270 : const double dfRowAcc2End =
4550 1064270 : pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
4551 1064270 : pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
4552 1064270 : dfAccumulatorReal +=
4553 1064270 : (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
4554 1064270 : pSrc += 2 * nSrcXSize;
4555 : }
4556 359916 : if (j == jMax)
4557 : {
4558 : // Process last line if there's an odd number of them.
4559 :
4560 : const XMMReg4Double v_acc =
4561 90039 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4562 90039 : const double dfRowAcc = v_acc.GetHorizSum();
4563 90039 : const double dfRowAccEnd =
4564 90039 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4565 90039 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4566 90039 : dfAccumulatorReal +=
4567 90039 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4568 : }
4569 : }
4570 : else
4571 : #endif
4572 : {
4573 1982080 : for (int j = jMin; j <= jMax; ++j)
4574 : {
4575 1708040 : int i = iMin;
4576 1708040 : double dfRowAcc1 = 0.0;
4577 1708040 : double dfRowAcc2 = 0.0;
4578 : // A bit of loop unrolling
4579 8474620 : for (; i < iMax; i += 2)
4580 : {
4581 6766580 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4582 6766580 : dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
4583 : }
4584 1708040 : if (i == iMax)
4585 : {
4586 : // Process last column if there's an odd number of them.
4587 1188570 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4588 : }
4589 :
4590 1708040 : dfAccumulatorReal +=
4591 1708040 : (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
4592 1708040 : pSrc += nSrcXSize;
4593 : }
4594 : }
4595 :
4596 : // Calculate the output taking into account weighting.
4597 633954 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4598 : {
4599 579748 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4600 579748 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4601 579748 : *pdfDensity = 1.0;
4602 : }
4603 : else
4604 : {
4605 54206 : *pdfReal = dfAccumulatorReal;
4606 54206 : *pdfDensity = 1.0;
4607 : }
4608 :
4609 633954 : return true;
4610 : }
4611 :
4612 620 : GPtrDiff_t iRowOffset =
4613 620 : iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
4614 :
4615 620 : int nCountValid = 0;
4616 620 : const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
4617 :
4618 3560 : for (int j = jMin; j <= jMax; ++j)
4619 : {
4620 2940 : iRowOffset += nSrcXSize;
4621 :
4622 : // Get pixel values.
4623 : // We can potentially read extra elements after the "normal" end of the
4624 : // source arrays, but the contract of papabySrcImage[iBand],
4625 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4626 : // is to have WARP_EXTRA_ELTS reserved at their end.
4627 2940 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4628 : padfRowDensity, padfRowReal, padfRowImag))
4629 0 : continue;
4630 :
4631 2940 : const double dfWeight1 = padfWeightsYShifted[j];
4632 :
4633 : // Iterate over pixels in row.
4634 2940 : if (padfRowDensity != nullptr)
4635 : {
4636 0 : for (int i = iMin; i <= iMax; ++i)
4637 : {
4638 : // Skip sampling if pixel has zero density.
4639 0 : if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
4640 0 : continue;
4641 :
4642 0 : nCountValid++;
4643 :
4644 : // Use a cached set of weights for this row.
4645 0 : const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
4646 :
4647 : // Accumulate!
4648 0 : dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
4649 0 : dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
4650 0 : dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
4651 0 : dfAccumulatorWeight += dfWeight2;
4652 : }
4653 : }
4654 2940 : else if (bIsNonComplex)
4655 : {
4656 1764 : double dfRowAccReal = 0.0;
4657 10560 : for (int i = iMin; i <= iMax; ++i)
4658 : {
4659 8796 : const double dfWeight2 = padfWeightsXShifted[i];
4660 :
4661 : // Accumulate!
4662 8796 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4663 : }
4664 :
4665 1764 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4666 : }
4667 : else
4668 : {
4669 1176 : double dfRowAccReal = 0.0;
4670 1176 : double dfRowAccImag = 0.0;
4671 7040 : for (int i = iMin; i <= iMax; ++i)
4672 : {
4673 5864 : const double dfWeight2 = padfWeightsXShifted[i];
4674 :
4675 : // Accumulate!
4676 5864 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4677 5864 : dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
4678 : }
4679 :
4680 1176 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4681 1176 : dfAccumulatorImag += dfRowAccImag * dfWeight1;
4682 : }
4683 : }
4684 :
4685 620 : if (dfAccumulatorWeight < 0.000001 ||
4686 0 : (padfRowDensity != nullptr &&
4687 0 : (dfAccumulatorDensity < 0.000001 ||
4688 0 : nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
4689 : {
4690 0 : *pdfDensity = 0.0;
4691 0 : return false;
4692 : }
4693 :
4694 : // Calculate the output taking into account weighting.
4695 620 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4696 : {
4697 0 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4698 0 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4699 0 : *pdfImag = dfAccumulatorImag * dfInvAcc;
4700 0 : if (padfRowDensity != nullptr)
4701 0 : *pdfDensity = dfAccumulatorDensity * dfInvAcc;
4702 : else
4703 0 : *pdfDensity = 1.0;
4704 : }
4705 : else
4706 : {
4707 620 : *pdfReal = dfAccumulatorReal;
4708 620 : *pdfImag = dfAccumulatorImag;
4709 620 : if (padfRowDensity != nullptr)
4710 0 : *pdfDensity = dfAccumulatorDensity;
4711 : else
4712 620 : *pdfDensity = 1.0;
4713 : }
4714 :
4715 620 : return true;
4716 : }
4717 :
4718 : /************************************************************************/
4719 : /* GWKComputeWeights() */
4720 : /************************************************************************/
4721 :
4722 1091070 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
4723 : double dfDeltaX, double dfXScale, int jMin,
4724 : int jMax, double dfDeltaY, double dfYScale,
4725 : double *padfWeightsHorizontal,
4726 : double *padfWeightsVertical, double &dfInvWeights)
4727 : {
4728 :
4729 1091070 : const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
4730 1091070 : CPLAssert(pfnGetWeight);
4731 1091070 : const FilterFunc4ValuesType pfnGetWeight4Values =
4732 1091070 : apfGWKFilter4Values[eResample];
4733 1091070 : CPLAssert(pfnGetWeight4Values);
4734 :
4735 1091070 : int i = iMin; // Used after for.
4736 1091070 : int iC = 0; // Used after for.
4737 : // Not zero, but as close as possible to it, to avoid potential division by
4738 : // zero at end of function
4739 1091070 : double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
4740 2403700 : for (; i + 2 < iMax; i += 4, iC += 4)
4741 : {
4742 1312620 : padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
4743 1312620 : padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
4744 1312620 : padfWeightsHorizontal[iC + 2] =
4745 1312620 : padfWeightsHorizontal[iC + 1] + dfXScale;
4746 1312620 : padfWeightsHorizontal[iC + 3] =
4747 1312620 : padfWeightsHorizontal[iC + 2] + dfXScale;
4748 1312620 : dfAccumulatorWeightHorizontal +=
4749 1312620 : pfnGetWeight4Values(padfWeightsHorizontal + iC);
4750 : }
4751 1145700 : for (; i <= iMax; ++i, ++iC)
4752 : {
4753 54623 : const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
4754 54623 : padfWeightsHorizontal[iC] = dfWeight;
4755 54623 : dfAccumulatorWeightHorizontal += dfWeight;
4756 : }
4757 :
4758 1091070 : int j = jMin; // Used after for.
4759 1091070 : int jC = 0; // Used after for.
4760 : // Not zero, but as close as possible to it, to avoid potential division by
4761 : // zero at end of function
4762 1091070 : double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
4763 2332840 : for (; j + 2 < jMax; j += 4, jC += 4)
4764 : {
4765 1241770 : padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
4766 1241770 : padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
4767 1241770 : padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
4768 1241770 : padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
4769 1241770 : dfAccumulatorWeightVertical +=
4770 1241770 : pfnGetWeight4Values(padfWeightsVertical + jC);
4771 : }
4772 1152230 : for (; j <= jMax; ++j, ++jC)
4773 : {
4774 61154 : const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
4775 61154 : padfWeightsVertical[jC] = dfWeight;
4776 61154 : dfAccumulatorWeightVertical += dfWeight;
4777 : }
4778 :
4779 1091070 : dfInvWeights =
4780 1091070 : 1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
4781 1091070 : }
4782 :
4783 : /************************************************************************/
4784 : /* GWKResampleNoMasksT() */
4785 : /************************************************************************/
4786 :
4787 : template <class T>
4788 : static bool
4789 : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4790 : double dfSrcY, T *pValue, double *padfWeightsHorizontal,
4791 : double *padfWeightsVertical, double &dfInvWeights)
4792 :
4793 : {
4794 : // Commonly used; save locally.
4795 : const int nSrcXSize = poWK->nSrcXSize;
4796 : const int nSrcYSize = poWK->nSrcYSize;
4797 :
4798 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4799 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4800 : const GPtrDiff_t iSrcOffset =
4801 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4802 :
4803 : const int nXRadius = poWK->nXRadius;
4804 : const int nYRadius = poWK->nYRadius;
4805 :
4806 : // Politely refuse to process invalid coordinates or obscenely small image.
4807 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4808 : nYRadius > nSrcYSize)
4809 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4810 : pValue);
4811 :
4812 : T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
4813 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4814 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4815 :
4816 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4817 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4818 :
4819 : int iMin = 1 - nXRadius;
4820 : if (iSrcX + iMin < 0)
4821 : iMin = -iSrcX;
4822 : int iMax = nXRadius;
4823 : if (iSrcX + iMax >= nSrcXSize - 1)
4824 : iMax = nSrcXSize - 1 - iSrcX;
4825 :
4826 : int jMin = 1 - nYRadius;
4827 : if (iSrcY + jMin < 0)
4828 : jMin = -iSrcY;
4829 : int jMax = nYRadius;
4830 : if (iSrcY + jMax >= nSrcYSize - 1)
4831 : jMax = nSrcYSize - 1 - iSrcY;
4832 :
4833 : if (iBand == 0)
4834 : {
4835 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4836 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4837 : padfWeightsVertical, dfInvWeights);
4838 : }
4839 :
4840 : // Loop over all rows in the kernel.
4841 : double dfAccumulator = 0.0;
4842 : for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
4843 : {
4844 : const GPtrDiff_t iSampJ =
4845 : iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
4846 :
4847 : // Loop over all pixels in the row.
4848 : double dfAccumulatorLocal = 0.0;
4849 : double dfAccumulatorLocal2 = 0.0;
4850 : int iC = 0;
4851 : int i = iMin;
4852 : // Process by chunk of 4 cols.
4853 : for (; i + 2 < iMax; i += 4, iC += 4)
4854 : {
4855 : // Retrieve the pixel & accumulate.
4856 : dfAccumulatorLocal +=
4857 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4858 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4859 : padfWeightsHorizontal[iC + 1];
4860 : dfAccumulatorLocal2 += double(pSrcBand[i + 2 + iSampJ]) *
4861 : padfWeightsHorizontal[iC + 2];
4862 : dfAccumulatorLocal2 += double(pSrcBand[i + 3 + iSampJ]) *
4863 : padfWeightsHorizontal[iC + 3];
4864 : }
4865 : dfAccumulatorLocal += dfAccumulatorLocal2;
4866 : if (i < iMax)
4867 : {
4868 : dfAccumulatorLocal +=
4869 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4870 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4871 : padfWeightsHorizontal[iC + 1];
4872 : i += 2;
4873 : iC += 2;
4874 : }
4875 : if (i == iMax)
4876 : {
4877 : dfAccumulatorLocal +=
4878 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4879 : }
4880 :
4881 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4882 : }
4883 :
4884 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4885 :
4886 : return true;
4887 : }
4888 :
4889 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
4890 : /* Could possibly be used too on 32bit, but we would need to check at runtime */
4891 : #if defined(USE_SSE2)
4892 :
4893 : /************************************************************************/
4894 : /* GWKResampleNoMasks_SSE2_T() */
4895 : /************************************************************************/
4896 :
4897 : template <class T>
4898 1382149 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
4899 : double dfSrcX, double dfSrcY, T *pValue,
4900 : double *padfWeightsHorizontal,
4901 : double *padfWeightsVertical,
4902 : double &dfInvWeights)
4903 : {
4904 : // Commonly used; save locally.
4905 1382149 : const int nSrcXSize = poWK->nSrcXSize;
4906 1382149 : const int nSrcYSize = poWK->nSrcYSize;
4907 :
4908 1382149 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4909 1382149 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4910 1382149 : const GPtrDiff_t iSrcOffset =
4911 1382149 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4912 1382149 : const int nXRadius = poWK->nXRadius;
4913 1382149 : const int nYRadius = poWK->nYRadius;
4914 :
4915 : // Politely refuse to process invalid coordinates or obscenely small image.
4916 1382149 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4917 : nYRadius > nSrcYSize)
4918 3 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4919 3 : pValue);
4920 :
4921 1382146 : const T *pSrcBand =
4922 1382146 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
4923 :
4924 1382146 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4925 1382146 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4926 1382146 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4927 1382146 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4928 :
4929 1382146 : int iMin = 1 - nXRadius;
4930 1382146 : if (iSrcX + iMin < 0)
4931 20312 : iMin = -iSrcX;
4932 1382146 : int iMax = nXRadius;
4933 1382146 : if (iSrcX + iMax >= nSrcXSize - 1)
4934 7970 : iMax = nSrcXSize - 1 - iSrcX;
4935 :
4936 1382146 : int jMin = 1 - nYRadius;
4937 1382146 : if (iSrcY + jMin < 0)
4938 22209 : jMin = -iSrcY;
4939 1382146 : int jMax = nYRadius;
4940 1382146 : if (iSrcY + jMax >= nSrcYSize - 1)
4941 9295 : jMax = nSrcYSize - 1 - iSrcY;
4942 :
4943 1382146 : if (iBand == 0)
4944 : {
4945 1091074 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4946 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4947 : padfWeightsVertical, dfInvWeights);
4948 : }
4949 :
4950 1382146 : GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4951 : // Process by chunk of 4 rows.
4952 1382146 : int jC = 0;
4953 1382146 : int j = jMin;
4954 1382146 : double dfAccumulator = 0.0;
4955 3068580 : for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
4956 : {
4957 : // Loop over all pixels in the row.
4958 1686436 : int iC = 0;
4959 1686436 : int i = iMin;
4960 : // Process by chunk of 4 cols.
4961 1686436 : XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
4962 1686436 : XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
4963 1686436 : XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
4964 1686436 : XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
4965 4251632 : for (; i + 2 < iMax; i += 4, iC += 4)
4966 : {
4967 : // Retrieve the pixel & accumulate.
4968 2565196 : XMMReg4Double v_pixels_1 =
4969 2565196 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4970 2565196 : XMMReg4Double v_pixels_2 =
4971 2565196 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
4972 2565196 : XMMReg4Double v_pixels_3 =
4973 2565196 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4974 2565196 : XMMReg4Double v_pixels_4 =
4975 2565196 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4976 :
4977 2565196 : XMMReg4Double v_padfWeight =
4978 2565196 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4979 :
4980 2565196 : v_acc_1 += v_pixels_1 * v_padfWeight;
4981 2565196 : v_acc_2 += v_pixels_2 * v_padfWeight;
4982 2565196 : v_acc_3 += v_pixels_3 * v_padfWeight;
4983 2565196 : v_acc_4 += v_pixels_4 * v_padfWeight;
4984 : }
4985 :
4986 1686436 : if (i < iMax)
4987 : {
4988 25512 : XMMReg2Double v_pixels_1 =
4989 25512 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
4990 25512 : XMMReg2Double v_pixels_2 =
4991 25512 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
4992 25512 : XMMReg2Double v_pixels_3 =
4993 25512 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4994 25512 : XMMReg2Double v_pixels_4 =
4995 25512 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4996 :
4997 25512 : XMMReg2Double v_padfWeight =
4998 25512 : XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
4999 :
5000 25512 : v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
5001 25512 : v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
5002 25512 : v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
5003 25512 : v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
5004 :
5005 25512 : i += 2;
5006 25512 : iC += 2;
5007 : }
5008 :
5009 1686436 : double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
5010 1686436 : double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
5011 1686436 : double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
5012 1686436 : double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
5013 :
5014 1686436 : if (i == iMax)
5015 : {
5016 27557 : dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
5017 27557 : padfWeightsHorizontal[iC];
5018 27557 : dfAccumulatorLocal_2 +=
5019 27557 : static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
5020 27557 : padfWeightsHorizontal[iC];
5021 27557 : dfAccumulatorLocal_3 +=
5022 27557 : static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
5023 27557 : padfWeightsHorizontal[iC];
5024 27557 : dfAccumulatorLocal_4 +=
5025 27557 : static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
5026 27557 : padfWeightsHorizontal[iC];
5027 : }
5028 :
5029 1686436 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
5030 1686436 : dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
5031 1686436 : dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
5032 1686436 : dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
5033 : }
5034 1456100 : for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
5035 : {
5036 : // Loop over all pixels in the row.
5037 73954 : int iC = 0;
5038 73954 : int i = iMin;
5039 : // Process by chunk of 4 cols.
5040 73954 : XMMReg4Double v_acc = XMMReg4Double::Zero();
5041 172926 : for (; i + 2 < iMax; i += 4, iC += 4)
5042 : {
5043 : // Retrieve the pixel & accumulate.
5044 98972 : XMMReg4Double v_pixels =
5045 98972 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
5046 98972 : XMMReg4Double v_padfWeight =
5047 98972 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
5048 :
5049 98972 : v_acc += v_pixels * v_padfWeight;
5050 : }
5051 :
5052 73954 : double dfAccumulatorLocal = v_acc.GetHorizSum();
5053 :
5054 73954 : if (i < iMax)
5055 : {
5056 1862 : dfAccumulatorLocal +=
5057 1862 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
5058 1862 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
5059 1862 : padfWeightsHorizontal[iC + 1];
5060 1862 : i += 2;
5061 1862 : iC += 2;
5062 : }
5063 73954 : if (i == iMax)
5064 : {
5065 1803 : dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
5066 1803 : padfWeightsHorizontal[iC];
5067 : }
5068 :
5069 73954 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
5070 : }
5071 :
5072 1382146 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
5073 :
5074 1382146 : return true;
5075 : }
5076 :
5077 : /************************************************************************/
5078 : /* GWKResampleNoMasksT<GByte>() */
5079 : /************************************************************************/
5080 :
5081 : template <>
5082 877023 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
5083 : double dfSrcX, double dfSrcY, GByte *pValue,
5084 : double *padfWeightsHorizontal,
5085 : double *padfWeightsVertical,
5086 : double &dfInvWeights)
5087 : {
5088 877023 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5089 : padfWeightsHorizontal, padfWeightsVertical,
5090 877023 : dfInvWeights);
5091 : }
5092 :
5093 : /************************************************************************/
5094 : /* GWKResampleNoMasksT<GInt16>() */
5095 : /************************************************************************/
5096 :
5097 : template <>
5098 252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
5099 : double dfSrcX, double dfSrcY, GInt16 *pValue,
5100 : double *padfWeightsHorizontal,
5101 : double *padfWeightsVertical,
5102 : double &dfInvWeights)
5103 : {
5104 252563 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5105 : padfWeightsHorizontal, padfWeightsVertical,
5106 252563 : dfInvWeights);
5107 : }
5108 :
5109 : /************************************************************************/
5110 : /* GWKResampleNoMasksT<GUInt16>() */
5111 : /************************************************************************/
5112 :
5113 : template <>
5114 250063 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
5115 : double dfSrcX, double dfSrcY, GUInt16 *pValue,
5116 : double *padfWeightsHorizontal,
5117 : double *padfWeightsVertical,
5118 : double &dfInvWeights)
5119 : {
5120 250063 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5121 : padfWeightsHorizontal, padfWeightsVertical,
5122 250063 : dfInvWeights);
5123 : }
5124 :
5125 : /************************************************************************/
5126 : /* GWKResampleNoMasksT<float>() */
5127 : /************************************************************************/
5128 :
5129 : template <>
5130 2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
5131 : double dfSrcX, double dfSrcY, float *pValue,
5132 : double *padfWeightsHorizontal,
5133 : double *padfWeightsVertical,
5134 : double &dfInvWeights)
5135 : {
5136 2500 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5137 : padfWeightsHorizontal, padfWeightsVertical,
5138 2500 : dfInvWeights);
5139 : }
5140 :
5141 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
5142 :
5143 : /************************************************************************/
5144 : /* GWKResampleNoMasksT<double>() */
5145 : /************************************************************************/
5146 :
5147 : template <>
5148 : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
5149 : double dfSrcX, double dfSrcY, double *pValue,
5150 : double *padfWeightsHorizontal,
5151 : double *padfWeightsVertical,
5152 : double &dfInvWeights)
5153 : {
5154 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5155 : padfWeightsHorizontal, padfWeightsVertical,
5156 : dfInvWeights);
5157 : }
5158 :
5159 : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
5160 :
5161 : #endif /* defined(USE_SSE2) */
5162 :
5163 : /************************************************************************/
5164 : /* GWKRoundSourceCoordinates() */
5165 : /************************************************************************/
5166 :
5167 1000 : static void GWKRoundSourceCoordinates(
5168 : int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
5169 : double dfSrcCoordPrecision, double dfErrorThreshold,
5170 : GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
5171 : double dfDstY)
5172 : {
5173 1000 : double dfPct = 0.8;
5174 1000 : if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
5175 : {
5176 1000 : dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
5177 : }
5178 1000 : const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
5179 :
5180 501000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5181 : {
5182 500000 : const double dfXBefore = padfX[iDstX];
5183 500000 : const double dfYBefore = padfY[iDstX];
5184 500000 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
5185 : dfSrcCoordPrecision;
5186 500000 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
5187 : dfSrcCoordPrecision;
5188 :
5189 : // If we are in an uncertainty zone, go to non-approximated
5190 : // transformation.
5191 : // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
5192 : // be at least 10 times greater than the approximation error.
5193 500000 : if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
5194 399914 : fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
5195 : {
5196 180090 : padfX[iDstX] = iDstX + dfDstXOff;
5197 180090 : padfY[iDstX] = dfDstY;
5198 180090 : padfZ[iDstX] = 0.0;
5199 180090 : pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
5200 180090 : padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
5201 180090 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
5202 : dfSrcCoordPrecision;
5203 180090 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
5204 : dfSrcCoordPrecision;
5205 : }
5206 : }
5207 1000 : }
5208 :
5209 : /************************************************************************/
5210 : /* GWKCheckAndComputeSrcOffsets() */
5211 : /************************************************************************/
5212 : static CPL_INLINE bool
5213 187159000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
5214 : int _iDstY, double *_padfX, double *_padfY,
5215 : int _nSrcXSize, int _nSrcYSize,
5216 : GPtrDiff_t &iSrcOffset)
5217 : {
5218 187159000 : const GDALWarpKernel *_poWK = psJob->poWK;
5219 193762000 : for (int iTry = 0; iTry < 2; ++iTry)
5220 : {
5221 193762000 : if (iTry == 1)
5222 : {
5223 : // If the source coordinate is slightly outside of the source raster
5224 : // retry to transform it alone, so that the exact coordinate
5225 : // transformer is used.
5226 :
5227 6603180 : _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
5228 6603180 : _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
5229 6603180 : double dfZ = 0;
5230 6603180 : _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
5231 6603180 : _padfX + _iDstX, _padfY + _iDstX, &dfZ,
5232 6603180 : _pabSuccess + _iDstX);
5233 : }
5234 193762000 : if (!_pabSuccess[_iDstX])
5235 3615020 : return false;
5236 :
5237 : // If this happens this is likely the symptom of a bug somewhere.
5238 190147000 : if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
5239 : {
5240 : static bool bNanCoordFound = false;
5241 0 : if (!bNanCoordFound)
5242 : {
5243 0 : CPLDebug("WARP",
5244 : "GWKCheckAndComputeSrcOffsets(): "
5245 : "NaN coordinate found on point %d.",
5246 : _iDstX);
5247 0 : bNanCoordFound = true;
5248 : }
5249 0 : return false;
5250 : }
5251 :
5252 : /* --------------------------------------------------------------------
5253 : */
5254 : /* Figure out what pixel we want in our source raster, and skip */
5255 : /* further processing if it is well off the source image. */
5256 : /* --------------------------------------------------------------------
5257 : */
5258 : /* We test against the value before casting to avoid the */
5259 : /* problem of asymmetric truncation effects around zero. That is */
5260 : /* -0.5 will be 0 when cast to an int. */
5261 190147000 : if (_padfX[_iDstX] < _poWK->nSrcXOff)
5262 : {
5263 : // If the source coordinate is slightly outside of the source raster
5264 : // retry to transform it alone, so that the exact coordinate
5265 : // transformer is used.
5266 16858100 : if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
5267 2889470 : continue;
5268 13968600 : return false;
5269 : }
5270 :
5271 173289000 : if (_padfY[_iDstX] < _poWK->nSrcYOff)
5272 : {
5273 : // If the source coordinate is slightly outside of the source raster
5274 : // retry to transform it alone, so that the exact coordinate
5275 : // transformer is used.
5276 7890610 : if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
5277 635435 : continue;
5278 7255180 : return false;
5279 : }
5280 :
5281 : // Check for potential overflow when casting from float to int, (if
5282 : // operating outside natural projection area, padfX/Y can be a very huge
5283 : // positive number before doing the actual conversion), as such cast is
5284 : // undefined behavior that can trigger exception with some compilers
5285 : // (see #6753)
5286 165399000 : if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
5287 : {
5288 : // If the source coordinate is slightly outside of the source raster
5289 : // retry to transform it alone, so that the exact coordinate
5290 : // transformer is used.
5291 13193200 : if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
5292 2712400 : continue;
5293 10480800 : return false;
5294 : }
5295 152205000 : if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
5296 : {
5297 : // If the source coordinate is slightly outside of the source raster
5298 : // retry to transform it alone, so that the exact coordinate
5299 : // transformer is used.
5300 5680180 : if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
5301 365873 : continue;
5302 5314300 : return false;
5303 : }
5304 :
5305 146525000 : break;
5306 : }
5307 :
5308 146525000 : int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
5309 146525000 : int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
5310 146525000 : if (iSrcX == _nSrcXSize)
5311 0 : iSrcX--;
5312 146525000 : if (iSrcY == _nSrcYSize)
5313 0 : iSrcY--;
5314 :
5315 : // Those checks should normally be OK given the previous ones.
5316 146525000 : CPLAssert(iSrcX >= 0);
5317 146525000 : CPLAssert(iSrcY >= 0);
5318 146525000 : CPLAssert(iSrcX < _nSrcXSize);
5319 146525000 : CPLAssert(iSrcY < _nSrcYSize);
5320 :
5321 146525000 : iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
5322 :
5323 146525000 : return true;
5324 : }
5325 :
5326 : /************************************************************************/
5327 : /* GWKOneSourceCornerFailsToReproject() */
5328 : /************************************************************************/
5329 :
5330 938 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
5331 : {
5332 938 : GDALWarpKernel *poWK = psJob->poWK;
5333 2802 : for (int iY = 0; iY <= 1; ++iY)
5334 : {
5335 5599 : for (int iX = 0; iX <= 1; ++iX)
5336 : {
5337 3735 : double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
5338 3735 : double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
5339 3735 : double dfZTmp = 0;
5340 3735 : int nSuccess = FALSE;
5341 3735 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
5342 : &dfYTmp, &dfZTmp, &nSuccess);
5343 3735 : if (!nSuccess)
5344 7 : return true;
5345 : }
5346 : }
5347 931 : return false;
5348 : }
5349 :
5350 : /************************************************************************/
5351 : /* GWKAdjustSrcOffsetOnEdge() */
5352 : /************************************************************************/
5353 :
5354 9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
5355 : GPtrDiff_t &iSrcOffset)
5356 : {
5357 9714 : GDALWarpKernel *poWK = psJob->poWK;
5358 9714 : const int nSrcXSize = poWK->nSrcXSize;
5359 9714 : const int nSrcYSize = poWK->nSrcYSize;
5360 :
5361 : // Check if the computed source position slightly altered
5362 : // fails to reproject. If so, then we are at the edge of
5363 : // the validity area, and it is worth checking neighbour
5364 : // source pixels for validity.
5365 9714 : int nSuccess = FALSE;
5366 : {
5367 9714 : double dfXTmp =
5368 9714 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5369 9714 : double dfYTmp =
5370 9714 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5371 9714 : double dfZTmp = 0;
5372 9714 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5373 : &dfZTmp, &nSuccess);
5374 : }
5375 9714 : if (nSuccess)
5376 : {
5377 6996 : double dfXTmp =
5378 6996 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5379 6996 : double dfYTmp =
5380 6996 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5381 6996 : double dfZTmp = 0;
5382 6996 : nSuccess = FALSE;
5383 6996 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5384 : &dfZTmp, &nSuccess);
5385 : }
5386 9714 : if (nSuccess)
5387 : {
5388 5624 : double dfXTmp =
5389 5624 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5390 5624 : double dfYTmp =
5391 5624 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5392 5624 : double dfZTmp = 0;
5393 5624 : nSuccess = FALSE;
5394 5624 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5395 : &dfZTmp, &nSuccess);
5396 : }
5397 :
5398 14166 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5399 4452 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
5400 : {
5401 1860 : iSrcOffset++;
5402 1860 : return true;
5403 : }
5404 10290 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5405 2436 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
5406 : {
5407 1334 : iSrcOffset += nSrcXSize;
5408 1334 : return true;
5409 : }
5410 7838 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5411 1318 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
5412 : {
5413 956 : iSrcOffset--;
5414 956 : return true;
5415 : }
5416 5924 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5417 360 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
5418 : {
5419 340 : iSrcOffset -= nSrcXSize;
5420 340 : return true;
5421 : }
5422 :
5423 5224 : return false;
5424 : }
5425 :
5426 : /************************************************************************/
5427 : /* GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity() */
5428 : /************************************************************************/
5429 :
5430 0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
5431 : GPtrDiff_t &iSrcOffset)
5432 : {
5433 0 : GDALWarpKernel *poWK = psJob->poWK;
5434 0 : const int nSrcXSize = poWK->nSrcXSize;
5435 0 : const int nSrcYSize = poWK->nSrcYSize;
5436 :
5437 : // Check if the computed source position slightly altered
5438 : // fails to reproject. If so, then we are at the edge of
5439 : // the validity area, and it is worth checking neighbour
5440 : // source pixels for validity.
5441 0 : int nSuccess = FALSE;
5442 : {
5443 0 : double dfXTmp =
5444 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5445 0 : double dfYTmp =
5446 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5447 0 : double dfZTmp = 0;
5448 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5449 : &dfZTmp, &nSuccess);
5450 : }
5451 0 : if (nSuccess)
5452 : {
5453 0 : double dfXTmp =
5454 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5455 0 : double dfYTmp =
5456 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5457 0 : double dfZTmp = 0;
5458 0 : nSuccess = FALSE;
5459 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5460 : &dfZTmp, &nSuccess);
5461 : }
5462 0 : if (nSuccess)
5463 : {
5464 0 : double dfXTmp =
5465 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5466 0 : double dfYTmp =
5467 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5468 0 : double dfZTmp = 0;
5469 0 : nSuccess = FALSE;
5470 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5471 : &dfZTmp, &nSuccess);
5472 : }
5473 :
5474 0 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5475 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >=
5476 : SRC_DENSITY_THRESHOLD_FLOAT)
5477 : {
5478 0 : iSrcOffset++;
5479 0 : return true;
5480 : }
5481 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5482 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
5483 : SRC_DENSITY_THRESHOLD_FLOAT)
5484 : {
5485 0 : iSrcOffset += nSrcXSize;
5486 0 : return true;
5487 : }
5488 0 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5489 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
5490 : SRC_DENSITY_THRESHOLD_FLOAT)
5491 : {
5492 0 : iSrcOffset--;
5493 0 : return true;
5494 : }
5495 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5496 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
5497 : SRC_DENSITY_THRESHOLD_FLOAT)
5498 : {
5499 0 : iSrcOffset -= nSrcXSize;
5500 0 : return true;
5501 : }
5502 :
5503 0 : return false;
5504 : }
5505 :
5506 : /************************************************************************/
5507 : /* GWKGeneralCase() */
5508 : /* */
5509 : /* This is the most general case. It attempts to handle all */
5510 : /* possible features with relatively little concern for */
5511 : /* efficiency. */
5512 : /************************************************************************/
5513 :
5514 239 : static void GWKGeneralCaseThread(void *pData)
5515 : {
5516 239 : GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
5517 239 : GDALWarpKernel *poWK = psJob->poWK;
5518 239 : const int iYMin = psJob->iYMin;
5519 239 : const int iYMax = psJob->iYMax;
5520 : const double dfMultFactorVerticalShiftPipeline =
5521 239 : poWK->bApplyVerticalShift
5522 239 : ? CPLAtof(CSLFetchNameValueDef(
5523 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5524 : "1.0"))
5525 239 : : 0.0;
5526 : const bool bAvoidNoDataSingleBand =
5527 239 : poWK->nBands == 1 ||
5528 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
5529 239 : "UNIFIED_SRC_NODATA", "FALSE"));
5530 :
5531 239 : int nDstXSize = poWK->nDstXSize;
5532 239 : int nSrcXSize = poWK->nSrcXSize;
5533 239 : int nSrcYSize = poWK->nSrcYSize;
5534 :
5535 : /* -------------------------------------------------------------------- */
5536 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5537 : /* scanlines worth of positions. */
5538 : /* -------------------------------------------------------------------- */
5539 : // For x, 2 *, because we cache the precomputed values at the end.
5540 : double *padfX =
5541 239 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5542 : double *padfY =
5543 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5544 : double *padfZ =
5545 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5546 239 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5547 :
5548 239 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
5549 :
5550 239 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5551 239 : if (poWK->eResample != GRA_NearestNeighbour)
5552 : {
5553 220 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5554 : }
5555 239 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5556 239 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5557 239 : const double dfErrorThreshold = CPLAtof(
5558 239 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5559 :
5560 : const bool bOneSourceCornerFailsToReproject =
5561 239 : GWKOneSourceCornerFailsToReproject(psJob);
5562 :
5563 : // Precompute values.
5564 6469 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5565 6230 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5566 :
5567 : /* ==================================================================== */
5568 : /* Loop over output lines. */
5569 : /* ==================================================================== */
5570 6469 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5571 : {
5572 : /* --------------------------------------------------------------------
5573 : */
5574 : /* Setup points to transform to source image space. */
5575 : /* --------------------------------------------------------------------
5576 : */
5577 6230 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5578 6230 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5579 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5580 236160 : padfY[iDstX] = dfY;
5581 6230 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5582 :
5583 : /* --------------------------------------------------------------------
5584 : */
5585 : /* Transform the points from destination pixel/line coordinates */
5586 : /* to source pixel/line coordinates. */
5587 : /* --------------------------------------------------------------------
5588 : */
5589 6230 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5590 : padfY, padfZ, pabSuccess);
5591 6230 : if (dfSrcCoordPrecision > 0.0)
5592 : {
5593 0 : GWKRoundSourceCoordinates(
5594 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5595 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5596 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5597 : }
5598 :
5599 : /* ====================================================================
5600 : */
5601 : /* Loop over pixels in output scanline. */
5602 : /* ====================================================================
5603 : */
5604 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5605 : {
5606 236160 : GPtrDiff_t iSrcOffset = 0;
5607 236160 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5608 : padfX, padfY, nSrcXSize,
5609 : nSrcYSize, iSrcOffset))
5610 0 : continue;
5611 :
5612 : /* --------------------------------------------------------------------
5613 : */
5614 : /* Do not try to apply transparent/invalid source pixels to the
5615 : */
5616 : /* destination. This currently ignores the multi-pixel input
5617 : */
5618 : /* of bilinear and cubic resamples. */
5619 : /* --------------------------------------------------------------------
5620 : */
5621 236160 : double dfDensity = 1.0;
5622 :
5623 236160 : if (poWK->pafUnifiedSrcDensity != nullptr)
5624 : {
5625 1200 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5626 1200 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5627 : {
5628 0 : if (!bOneSourceCornerFailsToReproject)
5629 : {
5630 0 : continue;
5631 : }
5632 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5633 : psJob, iSrcOffset))
5634 : {
5635 0 : dfDensity =
5636 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5637 : }
5638 : else
5639 : {
5640 0 : continue;
5641 : }
5642 : }
5643 : }
5644 :
5645 236160 : if (poWK->panUnifiedSrcValid != nullptr &&
5646 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5647 : {
5648 0 : if (!bOneSourceCornerFailsToReproject)
5649 : {
5650 0 : continue;
5651 : }
5652 0 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5653 : {
5654 0 : continue;
5655 : }
5656 : }
5657 :
5658 : /* ====================================================================
5659 : */
5660 : /* Loop processing each band. */
5661 : /* ====================================================================
5662 : */
5663 236160 : bool bHasFoundDensity = false;
5664 :
5665 236160 : const GPtrDiff_t iDstOffset =
5666 236160 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5667 472320 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5668 : {
5669 236160 : double dfBandDensity = 0.0;
5670 236160 : double dfValueReal = 0.0;
5671 236160 : double dfValueImag = 0.0;
5672 :
5673 : /* --------------------------------------------------------------------
5674 : */
5675 : /* Collect the source value. */
5676 : /* --------------------------------------------------------------------
5677 : */
5678 236160 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5679 : nSrcYSize == 1)
5680 : {
5681 : // FALSE is returned if dfBandDensity == 0, which is
5682 : // checked below.
5683 568 : CPL_IGNORE_RET_VAL(GWKGetPixelValue(
5684 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
5685 : &dfValueImag));
5686 : }
5687 235592 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5688 : {
5689 248 : GWKBilinearResample4Sample(
5690 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5691 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5692 : &dfValueReal, &dfValueImag);
5693 : }
5694 235344 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5695 : {
5696 248 : GWKCubicResample4Sample(
5697 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5698 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5699 : &dfValueReal, &dfValueImag);
5700 : }
5701 : else
5702 : #ifdef DEBUG
5703 : // Only useful for clang static analyzer.
5704 235096 : if (psWrkStruct != nullptr)
5705 : #endif
5706 : {
5707 235096 : psWrkStruct->pfnGWKResample(
5708 235096 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5709 235096 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5710 : &dfValueReal, &dfValueImag, psWrkStruct);
5711 : }
5712 :
5713 : // If we didn't find any valid inputs skip to next band.
5714 236160 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5715 0 : continue;
5716 :
5717 236160 : if (poWK->bApplyVerticalShift)
5718 : {
5719 0 : if (!std::isfinite(padfZ[iDstX]))
5720 0 : continue;
5721 : // Subtract padfZ[] since the coordinate transformation is
5722 : // from target to source
5723 0 : dfValueReal =
5724 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
5725 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5726 : }
5727 :
5728 236160 : bHasFoundDensity = true;
5729 :
5730 : /* --------------------------------------------------------------------
5731 : */
5732 : /* We have a computed value from the source. Now apply it
5733 : * to */
5734 : /* the destination pixel. */
5735 : /* --------------------------------------------------------------------
5736 : */
5737 236160 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
5738 : dfValueReal, dfValueImag,
5739 : bAvoidNoDataSingleBand);
5740 : }
5741 :
5742 236160 : if (!bHasFoundDensity)
5743 0 : continue;
5744 :
5745 236160 : if (!bAvoidNoDataSingleBand)
5746 : {
5747 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
5748 : }
5749 :
5750 : /* --------------------------------------------------------------------
5751 : */
5752 : /* Update destination density/validity masks. */
5753 : /* --------------------------------------------------------------------
5754 : */
5755 236160 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5756 :
5757 236160 : if (poWK->panDstValid != nullptr)
5758 : {
5759 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
5760 : }
5761 : } /* Next iDstX */
5762 :
5763 : /* --------------------------------------------------------------------
5764 : */
5765 : /* Report progress to the user, and optionally cancel out. */
5766 : /* --------------------------------------------------------------------
5767 : */
5768 6230 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5769 0 : break;
5770 : }
5771 :
5772 : /* -------------------------------------------------------------------- */
5773 : /* Cleanup and return. */
5774 : /* -------------------------------------------------------------------- */
5775 239 : CPLFree(padfX);
5776 239 : CPLFree(padfY);
5777 239 : CPLFree(padfZ);
5778 239 : CPLFree(pabSuccess);
5779 239 : if (psWrkStruct)
5780 220 : GWKResampleDeleteWrkStruct(psWrkStruct);
5781 239 : }
5782 :
5783 239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
5784 : {
5785 239 : return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
5786 : }
5787 :
5788 : /************************************************************************/
5789 : /* GWKRealCase() */
5790 : /* */
5791 : /* General case for non-complex data types. */
5792 : /************************************************************************/
5793 :
5794 223 : static void GWKRealCaseThread(void *pData)
5795 :
5796 : {
5797 223 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5798 223 : GDALWarpKernel *poWK = psJob->poWK;
5799 223 : const int iYMin = psJob->iYMin;
5800 223 : const int iYMax = psJob->iYMax;
5801 :
5802 223 : const int nDstXSize = poWK->nDstXSize;
5803 223 : const int nSrcXSize = poWK->nSrcXSize;
5804 223 : const int nSrcYSize = poWK->nSrcYSize;
5805 : const double dfMultFactorVerticalShiftPipeline =
5806 223 : poWK->bApplyVerticalShift
5807 223 : ? CPLAtof(CSLFetchNameValueDef(
5808 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5809 : "1.0"))
5810 223 : : 0.0;
5811 : const bool bAvoidNoDataSingleBand =
5812 305 : poWK->nBands == 1 ||
5813 82 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
5814 223 : "UNIFIED_SRC_NODATA", "FALSE"));
5815 :
5816 : /* -------------------------------------------------------------------- */
5817 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5818 : /* scanlines worth of positions. */
5819 : /* -------------------------------------------------------------------- */
5820 :
5821 : // For x, 2 *, because we cache the precomputed values at the end.
5822 : double *padfX =
5823 223 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5824 : double *padfY =
5825 223 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5826 : double *padfZ =
5827 223 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5828 223 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5829 :
5830 223 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
5831 :
5832 223 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5833 223 : if (poWK->eResample != GRA_NearestNeighbour)
5834 : {
5835 181 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5836 : }
5837 223 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5838 223 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5839 223 : const double dfErrorThreshold = CPLAtof(
5840 223 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5841 :
5842 638 : const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
5843 415 : poWK->papanBandSrcValid == nullptr &&
5844 192 : poWK->pafUnifiedSrcDensity != nullptr;
5845 :
5846 : const bool bOneSourceCornerFailsToReproject =
5847 223 : GWKOneSourceCornerFailsToReproject(psJob);
5848 :
5849 : // Precompute values.
5850 24657 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5851 24434 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5852 :
5853 : /* ==================================================================== */
5854 : /* Loop over output lines. */
5855 : /* ==================================================================== */
5856 25909 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5857 : {
5858 : /* --------------------------------------------------------------------
5859 : */
5860 : /* Setup points to transform to source image space. */
5861 : /* --------------------------------------------------------------------
5862 : */
5863 25686 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5864 25686 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5865 44594200 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5866 44568500 : padfY[iDstX] = dfY;
5867 25686 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5868 :
5869 : /* --------------------------------------------------------------------
5870 : */
5871 : /* Transform the points from destination pixel/line coordinates */
5872 : /* to source pixel/line coordinates. */
5873 : /* --------------------------------------------------------------------
5874 : */
5875 25686 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5876 : padfY, padfZ, pabSuccess);
5877 25686 : if (dfSrcCoordPrecision > 0.0)
5878 : {
5879 0 : GWKRoundSourceCoordinates(
5880 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5881 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5882 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5883 : }
5884 :
5885 : /* ====================================================================
5886 : */
5887 : /* Loop over pixels in output scanline. */
5888 : /* ====================================================================
5889 : */
5890 44594200 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5891 : {
5892 44568500 : GPtrDiff_t iSrcOffset = 0;
5893 44568500 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5894 : padfX, padfY, nSrcXSize,
5895 : nSrcYSize, iSrcOffset))
5896 43823900 : continue;
5897 :
5898 : /* --------------------------------------------------------------------
5899 : */
5900 : /* Do not try to apply transparent/invalid source pixels to the
5901 : */
5902 : /* destination. This currently ignores the multi-pixel input
5903 : */
5904 : /* of bilinear and cubic resamples. */
5905 : /* --------------------------------------------------------------------
5906 : */
5907 31812400 : double dfDensity = 1.0;
5908 :
5909 31812400 : if (poWK->pafUnifiedSrcDensity != nullptr)
5910 : {
5911 1669560 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5912 1669560 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5913 : {
5914 1538480 : if (!bOneSourceCornerFailsToReproject)
5915 : {
5916 1538480 : continue;
5917 : }
5918 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5919 : psJob, iSrcOffset))
5920 : {
5921 0 : dfDensity =
5922 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5923 : }
5924 : else
5925 : {
5926 0 : continue;
5927 : }
5928 : }
5929 : }
5930 :
5931 59903100 : if (poWK->panUnifiedSrcValid != nullptr &&
5932 29629200 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5933 : {
5934 29531600 : if (!bOneSourceCornerFailsToReproject)
5935 : {
5936 29529300 : continue;
5937 : }
5938 2229 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5939 : {
5940 0 : continue;
5941 : }
5942 : }
5943 :
5944 : /* ====================================================================
5945 : */
5946 : /* Loop processing each band. */
5947 : /* ====================================================================
5948 : */
5949 744578 : bool bHasFoundDensity = false;
5950 :
5951 744578 : const GPtrDiff_t iDstOffset =
5952 744578 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5953 2092550 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5954 : {
5955 1347980 : double dfBandDensity = 0.0;
5956 1347980 : double dfValueReal = 0.0;
5957 :
5958 : /* --------------------------------------------------------------------
5959 : */
5960 : /* Collect the source value. */
5961 : /* --------------------------------------------------------------------
5962 : */
5963 1347980 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5964 : nSrcYSize == 1)
5965 : {
5966 : // FALSE is returned if dfBandDensity == 0, which is
5967 : // checked below.
5968 15516 : CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
5969 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
5970 : }
5971 1332460 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5972 : {
5973 2046 : double dfValueImagIgnored = 0.0;
5974 2046 : GWKBilinearResample4Sample(
5975 2046 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5976 2046 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5977 2046 : &dfValueReal, &dfValueImagIgnored);
5978 : }
5979 1330410 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5980 : {
5981 691552 : if (bSrcMaskIsDensity)
5982 : {
5983 389755 : if (poWK->eWorkingDataType == GDT_UInt8)
5984 : {
5985 389755 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
5986 389755 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5987 389755 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5988 : &dfValueReal);
5989 : }
5990 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
5991 : {
5992 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<
5993 0 : GUInt16>(poWK, iBand,
5994 0 : padfX[iDstX] - poWK->nSrcXOff,
5995 0 : padfY[iDstX] - poWK->nSrcYOff,
5996 : &dfBandDensity, &dfValueReal);
5997 : }
5998 : else
5999 : {
6000 0 : GWKCubicResampleSrcMaskIsDensity4SampleReal(
6001 0 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6002 0 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6003 : &dfValueReal);
6004 : }
6005 : }
6006 : else
6007 : {
6008 301797 : double dfValueImagIgnored = 0.0;
6009 301797 : GWKCubicResample4Sample(
6010 301797 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6011 301797 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6012 : &dfValueReal, &dfValueImagIgnored);
6013 691552 : }
6014 : }
6015 : else
6016 : #ifdef DEBUG
6017 : // Only useful for clang static analyzer.
6018 638861 : if (psWrkStruct != nullptr)
6019 : #endif
6020 : {
6021 638861 : double dfValueImagIgnored = 0.0;
6022 638861 : psWrkStruct->pfnGWKResample(
6023 638861 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6024 638861 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6025 : &dfValueReal, &dfValueImagIgnored, psWrkStruct);
6026 : }
6027 :
6028 : // If we didn't find any valid inputs skip to next band.
6029 1347980 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
6030 0 : continue;
6031 :
6032 1347980 : if (poWK->bApplyVerticalShift)
6033 : {
6034 0 : if (!std::isfinite(padfZ[iDstX]))
6035 0 : continue;
6036 : // Subtract padfZ[] since the coordinate transformation is
6037 : // from target to source
6038 0 : dfValueReal =
6039 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
6040 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
6041 : }
6042 :
6043 1347980 : bHasFoundDensity = true;
6044 :
6045 : /* --------------------------------------------------------------------
6046 : */
6047 : /* We have a computed value from the source. Now apply it
6048 : * to */
6049 : /* the destination pixel. */
6050 : /* --------------------------------------------------------------------
6051 : */
6052 1347980 : GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
6053 : dfValueReal, bAvoidNoDataSingleBand);
6054 : }
6055 :
6056 744578 : if (!bHasFoundDensity)
6057 0 : continue;
6058 :
6059 744578 : if (!bAvoidNoDataSingleBand)
6060 : {
6061 100295 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
6062 : }
6063 :
6064 : /* --------------------------------------------------------------------
6065 : */
6066 : /* Update destination density/validity masks. */
6067 : /* --------------------------------------------------------------------
6068 : */
6069 744578 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6070 :
6071 744578 : if (poWK->panDstValid != nullptr)
6072 : {
6073 104586 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6074 : }
6075 : } // Next iDstX.
6076 :
6077 : /* --------------------------------------------------------------------
6078 : */
6079 : /* Report progress to the user, and optionally cancel out. */
6080 : /* --------------------------------------------------------------------
6081 : */
6082 25686 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6083 0 : break;
6084 : }
6085 :
6086 : /* -------------------------------------------------------------------- */
6087 : /* Cleanup and return. */
6088 : /* -------------------------------------------------------------------- */
6089 223 : CPLFree(padfX);
6090 223 : CPLFree(padfY);
6091 223 : CPLFree(padfZ);
6092 223 : CPLFree(pabSuccess);
6093 223 : if (psWrkStruct)
6094 181 : GWKResampleDeleteWrkStruct(psWrkStruct);
6095 223 : }
6096 :
6097 223 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
6098 : {
6099 223 : return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
6100 : }
6101 :
6102 : /************************************************************************/
6103 : /* GWKCubicResampleNoMasks4MultiBandT() */
6104 : /************************************************************************/
6105 :
6106 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
6107 : /* and enough SSE registries */
6108 : #if defined(USE_SSE2)
6109 :
6110 142031000 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
6111 : const __m128 row2, const __m128 row3,
6112 : const __m128 weightsXY0,
6113 : const __m128 weightsXY1,
6114 : const __m128 weightsXY2,
6115 : const __m128 weightsXY3)
6116 : {
6117 994218000 : return XMMHorizontalAdd(_mm_add_ps(
6118 : _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
6119 : _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
6120 142031000 : _mm_mul_ps(row3, weightsXY3))));
6121 : }
6122 :
6123 : template <class T>
6124 48826142 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
6125 : double dfSrcX, double dfSrcY,
6126 : const GPtrDiff_t iDstOffset)
6127 : {
6128 48826142 : const double dfSrcXShifted = dfSrcX - 0.5;
6129 48826142 : const int iSrcX = static_cast<int>(dfSrcXShifted);
6130 48826142 : const double dfSrcYShifted = dfSrcY - 0.5;
6131 48826142 : const int iSrcY = static_cast<int>(dfSrcYShifted);
6132 48826142 : const GPtrDiff_t iSrcOffset =
6133 48826142 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
6134 :
6135 : // Get the bilinear interpolation at the image borders.
6136 48826142 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
6137 47479062 : iSrcY + 2 >= poWK->nSrcYSize)
6138 : {
6139 5929580 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6140 : {
6141 : T value;
6142 4447190 : GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
6143 : &value);
6144 4447190 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6145 : value;
6146 1482400 : }
6147 : }
6148 : else
6149 : {
6150 47343762 : const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
6151 47343762 : const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
6152 :
6153 : float afCoeffsX[4];
6154 : float afCoeffsY[4];
6155 47343762 : GWKCubicComputeWeights(fDeltaX, afCoeffsX);
6156 47343762 : GWKCubicComputeWeights(fDeltaY, afCoeffsY);
6157 47343762 : const auto weightsX = _mm_loadu_ps(afCoeffsX);
6158 : const auto weightsXY0 =
6159 94687424 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
6160 : const auto weightsXY1 =
6161 94687424 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
6162 : const auto weightsXY2 =
6163 94687424 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
6164 : const auto weightsXY3 =
6165 47343762 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
6166 :
6167 47343762 : const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
6168 :
6169 47343762 : int iBand = 0;
6170 : // Process 2 bands at a time
6171 94687424 : for (; iBand + 1 < poWK->nBands; iBand += 2)
6172 : {
6173 47343762 : const T *CPL_RESTRICT pBand0 =
6174 47343762 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6175 47343762 : const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
6176 : const auto row1_0 =
6177 47343762 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6178 : const auto row2_0 =
6179 47343762 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6180 : const auto row3_0 =
6181 47343762 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6182 :
6183 47343762 : const T *CPL_RESTRICT pBand1 =
6184 47343762 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
6185 47343762 : const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
6186 : const auto row1_1 =
6187 47343762 : XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
6188 : const auto row2_1 =
6189 47343762 : XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
6190 : const auto row3_1 =
6191 47343762 : XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
6192 :
6193 : const float fValue_0 =
6194 47343762 : Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
6195 : weightsXY1, weightsXY2, weightsXY3);
6196 :
6197 : const float fValue_1 =
6198 47343762 : Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
6199 : weightsXY1, weightsXY2, weightsXY3);
6200 :
6201 47343762 : T *CPL_RESTRICT pDstBand0 =
6202 47343762 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6203 47343762 : pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
6204 :
6205 47343762 : T *CPL_RESTRICT pDstBand1 =
6206 47343762 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
6207 47343762 : pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
6208 : }
6209 47343762 : if (iBand < poWK->nBands)
6210 : {
6211 47343762 : const T *CPL_RESTRICT pBand0 =
6212 47343762 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6213 47343762 : const auto row0 = XMMLoad4Values(pBand0 + iOffset);
6214 : const auto row1 =
6215 47343762 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6216 : const auto row2 =
6217 47343762 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6218 : const auto row3 =
6219 47343762 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6220 :
6221 : const float fValue =
6222 47343762 : Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
6223 : weightsXY2, weightsXY3);
6224 :
6225 47343762 : T *CPL_RESTRICT pDstBand =
6226 47343762 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6227 47343762 : pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
6228 : }
6229 : }
6230 :
6231 48826142 : if (poWK->pafDstDensity)
6232 46672101 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6233 48826142 : }
6234 :
6235 : #endif // defined(USE_SSE2)
6236 :
6237 : /************************************************************************/
6238 : /* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */
6239 : /************************************************************************/
6240 :
6241 : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
6242 1984 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
6243 :
6244 : {
6245 1984 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6246 1984 : GDALWarpKernel *poWK = psJob->poWK;
6247 1984 : const int iYMin = psJob->iYMin;
6248 1984 : const int iYMax = psJob->iYMax;
6249 1966 : const double dfMultFactorVerticalShiftPipeline =
6250 1984 : poWK->bApplyVerticalShift
6251 18 : ? CPLAtof(CSLFetchNameValueDef(
6252 18 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6253 : "1.0"))
6254 : : 0.0;
6255 :
6256 1984 : const int nDstXSize = poWK->nDstXSize;
6257 1984 : const int nSrcXSize = poWK->nSrcXSize;
6258 1984 : const int nSrcYSize = poWK->nSrcYSize;
6259 :
6260 : /* -------------------------------------------------------------------- */
6261 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6262 : /* scanlines worth of positions. */
6263 : /* -------------------------------------------------------------------- */
6264 :
6265 : // For x, 2 *, because we cache the precomputed values at the end.
6266 : double *padfX =
6267 1984 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6268 : double *padfY =
6269 1984 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6270 : double *padfZ =
6271 1984 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6272 1984 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6273 :
6274 1984 : const int nXRadius = poWK->nXRadius;
6275 : double *padfWeightsX =
6276 1984 : static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
6277 : double *padfWeightsY = static_cast<double *>(
6278 1984 : CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
6279 1984 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6280 1984 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6281 1984 : const double dfErrorThreshold = CPLAtof(
6282 1984 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6283 :
6284 : // Precompute values.
6285 493219 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6286 491235 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6287 :
6288 : /* ==================================================================== */
6289 : /* Loop over output lines. */
6290 : /* ==================================================================== */
6291 313089 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6292 : {
6293 : /* --------------------------------------------------------------------
6294 : */
6295 : /* Setup points to transform to source image space. */
6296 : /* --------------------------------------------------------------------
6297 : */
6298 311106 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6299 311106 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6300 108893195 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6301 108582073 : padfY[iDstX] = dfY;
6302 311106 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6303 :
6304 : /* --------------------------------------------------------------------
6305 : */
6306 : /* Transform the points from destination pixel/line coordinates */
6307 : /* to source pixel/line coordinates. */
6308 : /* --------------------------------------------------------------------
6309 : */
6310 311106 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6311 : padfY, padfZ, pabSuccess);
6312 311106 : if (dfSrcCoordPrecision > 0.0)
6313 : {
6314 1000 : GWKRoundSourceCoordinates(
6315 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6316 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6317 1000 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6318 : }
6319 :
6320 : /* ====================================================================
6321 : */
6322 : /* Loop over pixels in output scanline. */
6323 : /* ====================================================================
6324 : */
6325 108893195 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6326 : {
6327 108582073 : GPtrDiff_t iSrcOffset = 0;
6328 108582073 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6329 : padfX, padfY, nSrcXSize,
6330 : nSrcYSize, iSrcOffset))
6331 61444108 : continue;
6332 :
6333 : /* ====================================================================
6334 : */
6335 : /* Loop processing each band. */
6336 : /* ====================================================================
6337 : */
6338 95964087 : const GPtrDiff_t iDstOffset =
6339 95964087 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6340 :
6341 : #if defined(USE_SSE2)
6342 : if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
6343 : (std::is_same<T, GByte>::value ||
6344 : std::is_same<T, GUInt16>::value))
6345 : {
6346 49891741 : if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
6347 : {
6348 48826142 : GWKCubicResampleNoMasks4MultiBandT<T>(
6349 48826142 : poWK, padfX[iDstX] - poWK->nSrcXOff,
6350 48826142 : padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
6351 :
6352 48826142 : continue;
6353 : }
6354 : }
6355 : #endif // defined(USE_SSE2)
6356 :
6357 47137958 : [[maybe_unused]] double dfInvWeights = 0;
6358 127960488 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6359 : {
6360 80822324 : T value = 0;
6361 : if constexpr (eResample == GRA_NearestNeighbour)
6362 : {
6363 73097530 : value = reinterpret_cast<T *>(
6364 73097530 : poWK->papabySrcImage[iBand])[iSrcOffset];
6365 : }
6366 : else if constexpr (bUse4SamplesFormula)
6367 : {
6368 : if constexpr (eResample == GRA_Bilinear)
6369 4041681 : GWKBilinearResampleNoMasks4SampleT(
6370 4041681 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6371 4041681 : padfY[iDstX] - poWK->nSrcYOff, &value);
6372 : else
6373 2300964 : GWKCubicResampleNoMasks4SampleT(
6374 2300964 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6375 2300964 : padfY[iDstX] - poWK->nSrcYOff, &value);
6376 : }
6377 : else
6378 : {
6379 1382149 : GWKResampleNoMasksT(
6380 1382149 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6381 1382149 : padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
6382 : padfWeightsY, dfInvWeights);
6383 : }
6384 :
6385 80822324 : if (poWK->bApplyVerticalShift)
6386 : {
6387 818 : if (!std::isfinite(padfZ[iDstX]))
6388 0 : continue;
6389 : // Subtract padfZ[] since the coordinate transformation is
6390 : // from target to source
6391 818 : value = GWKClampValueT<T>(
6392 818 : double(value) * poWK->dfMultFactorVerticalShift -
6393 818 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6394 : }
6395 :
6396 80822324 : if (poWK->pafDstDensity)
6397 8224397 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6398 :
6399 80822324 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6400 : value;
6401 : }
6402 : }
6403 :
6404 : /* --------------------------------------------------------------------
6405 : */
6406 : /* Report progress to the user, and optionally cancel out. */
6407 : /* --------------------------------------------------------------------
6408 : */
6409 311106 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6410 1 : break;
6411 : }
6412 :
6413 : /* -------------------------------------------------------------------- */
6414 : /* Cleanup and return. */
6415 : /* -------------------------------------------------------------------- */
6416 1984 : CPLFree(padfX);
6417 1984 : CPLFree(padfY);
6418 1984 : CPLFree(padfZ);
6419 1984 : CPLFree(pabSuccess);
6420 1984 : CPLFree(padfWeightsX);
6421 1984 : CPLFree(padfWeightsY);
6422 1984 : }
6423 :
6424 : template <class T, GDALResampleAlg eResample>
6425 960 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
6426 : {
6427 960 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6428 : pData);
6429 960 : }
6430 :
6431 : template <class T, GDALResampleAlg eResample>
6432 1024 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
6433 :
6434 : {
6435 1024 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6436 1024 : GDALWarpKernel *poWK = psJob->poWK;
6437 : static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
6438 1024 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
6439 1024 : if (bUse4SamplesFormula)
6440 969 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
6441 : pData);
6442 : else
6443 55 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6444 : pData);
6445 1024 : }
6446 :
6447 909 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6448 : {
6449 909 : return GWKRun(
6450 : poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
6451 909 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
6452 : }
6453 :
6454 128 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6455 : {
6456 128 : return GWKRun(
6457 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
6458 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
6459 128 : GRA_Bilinear>);
6460 : }
6461 :
6462 850 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6463 : {
6464 850 : return GWKRun(
6465 : poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
6466 850 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
6467 : }
6468 :
6469 9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6470 : {
6471 9 : return GWKRun(
6472 : poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
6473 9 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
6474 : }
6475 :
6476 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6477 :
6478 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6479 : {
6480 : return GWKRun(
6481 : poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
6482 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
6483 : }
6484 : #endif
6485 :
6486 12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6487 : {
6488 12 : return GWKRun(
6489 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
6490 12 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
6491 : }
6492 :
6493 : /************************************************************************/
6494 : /* GWKNearestByte() */
6495 : /* */
6496 : /* Case for 8bit input data with nearest neighbour resampling */
6497 : /* using valid flags. Should be as fast as possible for this */
6498 : /* particular transformation type. */
6499 : /************************************************************************/
6500 :
6501 476 : template <class T> static void GWKNearestThread(void *pData)
6502 :
6503 : {
6504 476 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6505 476 : GDALWarpKernel *poWK = psJob->poWK;
6506 476 : const int iYMin = psJob->iYMin;
6507 476 : const int iYMax = psJob->iYMax;
6508 476 : const double dfMultFactorVerticalShiftPipeline =
6509 476 : poWK->bApplyVerticalShift
6510 0 : ? CPLAtof(CSLFetchNameValueDef(
6511 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6512 : "1.0"))
6513 : : 0.0;
6514 476 : const bool bAvoidNoDataSingleBand =
6515 545 : poWK->nBands == 1 ||
6516 69 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
6517 : "UNIFIED_SRC_NODATA", "FALSE"));
6518 :
6519 476 : const int nDstXSize = poWK->nDstXSize;
6520 476 : const int nSrcXSize = poWK->nSrcXSize;
6521 476 : const int nSrcYSize = poWK->nSrcYSize;
6522 :
6523 : /* -------------------------------------------------------------------- */
6524 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6525 : /* scanlines worth of positions. */
6526 : /* -------------------------------------------------------------------- */
6527 :
6528 : // For x, 2 *, because we cache the precomputed values at the end.
6529 : double *padfX =
6530 476 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6531 : double *padfY =
6532 476 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6533 : double *padfZ =
6534 476 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6535 476 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6536 :
6537 476 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6538 476 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6539 476 : const double dfErrorThreshold = CPLAtof(
6540 476 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6541 :
6542 : const bool bOneSourceCornerFailsToReproject =
6543 476 : GWKOneSourceCornerFailsToReproject(psJob);
6544 :
6545 : // Precompute values.
6546 80555 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6547 80079 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6548 :
6549 : /* ==================================================================== */
6550 : /* Loop over output lines. */
6551 : /* ==================================================================== */
6552 64711 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6553 : {
6554 :
6555 : /* --------------------------------------------------------------------
6556 : */
6557 : /* Setup points to transform to source image space. */
6558 : /* --------------------------------------------------------------------
6559 : */
6560 64235 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6561 64235 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6562 33836597 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6563 33772441 : padfY[iDstX] = dfY;
6564 64235 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6565 :
6566 : /* --------------------------------------------------------------------
6567 : */
6568 : /* Transform the points from destination pixel/line coordinates */
6569 : /* to source pixel/line coordinates. */
6570 : /* --------------------------------------------------------------------
6571 : */
6572 64235 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6573 : padfY, padfZ, pabSuccess);
6574 64235 : if (dfSrcCoordPrecision > 0.0)
6575 : {
6576 0 : GWKRoundSourceCoordinates(
6577 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6578 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6579 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6580 : }
6581 : /* ====================================================================
6582 : */
6583 : /* Loop over pixels in output scanline. */
6584 : /* ====================================================================
6585 : */
6586 33836597 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6587 : {
6588 33772441 : GPtrDiff_t iSrcOffset = 0;
6589 33772441 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6590 : padfX, padfY, nSrcXSize,
6591 : nSrcYSize, iSrcOffset))
6592 21383643 : continue;
6593 :
6594 : /* --------------------------------------------------------------------
6595 : */
6596 : /* Do not try to apply invalid source pixels to the dest. */
6597 : /* --------------------------------------------------------------------
6598 : */
6599 25227005 : if (poWK->panUnifiedSrcValid != nullptr &&
6600 6714445 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6601 : {
6602 5120982 : if (!bOneSourceCornerFailsToReproject)
6603 : {
6604 5113496 : continue;
6605 : }
6606 7485 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
6607 : {
6608 5224 : continue;
6609 : }
6610 : }
6611 :
6612 : /* --------------------------------------------------------------------
6613 : */
6614 : /* Do not try to apply transparent source pixels to the
6615 : * destination.*/
6616 : /* --------------------------------------------------------------------
6617 : */
6618 13393880 : double dfDensity = 1.0;
6619 :
6620 13393880 : if (poWK->pafUnifiedSrcDensity != nullptr)
6621 : {
6622 1557335 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
6623 1557335 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
6624 1005075 : continue;
6625 : }
6626 :
6627 : /* ====================================================================
6628 : */
6629 : /* Loop processing each band. */
6630 : /* ====================================================================
6631 : */
6632 :
6633 12388798 : const GPtrDiff_t iDstOffset =
6634 12388798 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6635 :
6636 27339658 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6637 : {
6638 14950960 : T value = 0;
6639 14950960 : double dfBandDensity = 0.0;
6640 :
6641 : /* --------------------------------------------------------------------
6642 : */
6643 : /* Collect the source value. */
6644 : /* --------------------------------------------------------------------
6645 : */
6646 14950960 : if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
6647 : &value))
6648 : {
6649 :
6650 14950860 : if (poWK->bApplyVerticalShift)
6651 : {
6652 0 : if (!std::isfinite(padfZ[iDstX]))
6653 0 : continue;
6654 : // Subtract padfZ[] since the coordinate transformation
6655 : // is from target to source
6656 0 : value = GWKClampValueT<T>(
6657 0 : double(value) * poWK->dfMultFactorVerticalShift -
6658 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6659 : }
6660 :
6661 14950860 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6662 : dfBandDensity, value,
6663 : bAvoidNoDataSingleBand);
6664 : }
6665 : }
6666 :
6667 : /* --------------------------------------------------------------------
6668 : */
6669 : /* Mark this pixel valid/opaque in the output. */
6670 : /* --------------------------------------------------------------------
6671 : */
6672 :
6673 12388798 : if (!bAvoidNoDataSingleBand)
6674 : {
6675 424278 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
6676 : }
6677 :
6678 12388798 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6679 :
6680 12388798 : if (poWK->panDstValid != nullptr)
6681 : {
6682 11118345 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6683 : }
6684 : } /* Next iDstX */
6685 :
6686 : /* --------------------------------------------------------------------
6687 : */
6688 : /* Report progress to the user, and optionally cancel out. */
6689 : /* --------------------------------------------------------------------
6690 : */
6691 64235 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6692 0 : break;
6693 : }
6694 :
6695 : /* -------------------------------------------------------------------- */
6696 : /* Cleanup and return. */
6697 : /* -------------------------------------------------------------------- */
6698 476 : CPLFree(padfX);
6699 476 : CPLFree(padfY);
6700 476 : CPLFree(padfZ);
6701 476 : CPLFree(pabSuccess);
6702 476 : }
6703 :
6704 363 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
6705 : {
6706 363 : return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
6707 : }
6708 :
6709 14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6710 : {
6711 14 : return GWKRun(
6712 : poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
6713 14 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
6714 : }
6715 :
6716 5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6717 : {
6718 5 : return GWKRun(
6719 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
6720 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
6721 5 : GRA_Bilinear>);
6722 : }
6723 :
6724 6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6725 : {
6726 6 : return GWKRun(
6727 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
6728 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
6729 6 : GRA_Bilinear>);
6730 : }
6731 :
6732 4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6733 : {
6734 4 : return GWKRun(
6735 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
6736 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
6737 4 : GRA_Bilinear>);
6738 : }
6739 :
6740 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6741 :
6742 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6743 : {
6744 : return GWKRun(
6745 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
6746 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
6747 : GRA_Bilinear>);
6748 : }
6749 : #endif
6750 :
6751 5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6752 : {
6753 5 : return GWKRun(
6754 : poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
6755 5 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
6756 : }
6757 :
6758 14 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6759 : {
6760 14 : return GWKRun(
6761 : poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
6762 14 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
6763 : }
6764 :
6765 6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6766 : {
6767 6 : return GWKRun(
6768 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
6769 6 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
6770 : }
6771 :
6772 5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6773 : {
6774 5 : return GWKRun(
6775 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
6776 5 : GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
6777 : }
6778 :
6779 9 : static CPLErr GWKNearestInt8(GDALWarpKernel *poWK)
6780 : {
6781 9 : return GWKRun(poWK, "GWKNearestInt8", GWKNearestThread<int8_t>);
6782 : }
6783 :
6784 40 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
6785 : {
6786 40 : return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
6787 : }
6788 :
6789 10 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
6790 : {
6791 10 : return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
6792 : }
6793 :
6794 11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6795 : {
6796 11 : return GWKRun(
6797 : poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
6798 11 : GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
6799 : }
6800 :
6801 50 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
6802 : {
6803 50 : return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
6804 : }
6805 :
6806 : /************************************************************************/
6807 : /* GWKAverageOrMode() */
6808 : /* */
6809 : /************************************************************************/
6810 :
6811 : #define COMPUTE_WEIGHT_Y(iSrcY) \
6812 : ((iSrcY == iSrcYMin) \
6813 : ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin)) \
6814 : : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax) \
6815 : : 1.0)
6816 :
6817 : #define COMPUTE_WEIGHT(iSrcX, dfWeightY) \
6818 : ((iSrcX == iSrcXMin) ? ((iSrcXMin + 1 == iSrcXMax) \
6819 : ? dfWeightY \
6820 : : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
6821 : : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax)) \
6822 : : dfWeightY)
6823 :
6824 : static void GWKAverageOrModeThread(void *pData);
6825 :
6826 246 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
6827 : {
6828 246 : return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
6829 : }
6830 :
6831 : /************************************************************************/
6832 : /* GWKAverageOrModeComputeLineCoords() */
6833 : /************************************************************************/
6834 :
6835 28663 : static void GWKAverageOrModeComputeLineCoords(
6836 : const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6837 : double *padfY2, double *padfZ, double *padfZ2, int *pabSuccess,
6838 : int *pabSuccess2, int iDstY, double dfSrcCoordPrecision,
6839 : double dfErrorThreshold)
6840 : {
6841 28663 : const GDALWarpKernel *poWK = psJob->poWK;
6842 28663 : const int nDstXSize = poWK->nDstXSize;
6843 :
6844 : // Setup points to transform to source image space.
6845 7360890 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6846 : {
6847 7332220 : padfX[iDstX] = iDstX + poWK->nDstXOff;
6848 7332220 : padfY[iDstX] = iDstY + poWK->nDstYOff;
6849 7332220 : padfZ[iDstX] = 0.0;
6850 7332220 : padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
6851 7332220 : padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
6852 7332220 : padfZ2[iDstX] = 0.0;
6853 : }
6854 :
6855 : /* ----------------------------------------------------------------- */
6856 : /* Transform the points from destination pixel/line coordinates */
6857 : /* to source pixel/line coordinates. */
6858 : /* ----------------------------------------------------------------- */
6859 28663 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY,
6860 : padfZ, pabSuccess);
6861 28663 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
6862 : padfY2, padfZ2, pabSuccess2);
6863 :
6864 28663 : if (dfSrcCoordPrecision > 0.0)
6865 : {
6866 0 : GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
6867 : dfSrcCoordPrecision, dfErrorThreshold,
6868 0 : poWK->pfnTransformer, psJob->pTransformerArg,
6869 0 : poWK->nDstXOff, iDstY + poWK->nDstYOff);
6870 0 : GWKRoundSourceCoordinates(
6871 : nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision,
6872 0 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6873 0 : 1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff);
6874 : }
6875 28663 : }
6876 :
6877 : /************************************************************************/
6878 : /* GWKAverageOrModeComputeSourceCoords() */
6879 : /************************************************************************/
6880 :
6881 7332220 : static bool GWKAverageOrModeComputeSourceCoords(
6882 : const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6883 : double *padfY2, int iDstX, int iDstY, int nXMargin, int nYMargin,
6884 : // Output:
6885 : bool &bWrapOverX, double &dfXMin, double &dfYMin, double &dfXMax,
6886 : double &dfYMax, int &iSrcXMin, int &iSrcYMin, int &iSrcXMax, int &iSrcYMax)
6887 : {
6888 7332220 : const GDALWarpKernel *poWK = psJob->poWK;
6889 7332220 : const int nSrcXSize = poWK->nSrcXSize;
6890 7332220 : const int nSrcYSize = poWK->nSrcYSize;
6891 :
6892 : // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
6893 : // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
6894 7332220 : if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6895 6814810 : padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6896 6814810 : padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6897 6532210 : padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6898 6532210 : padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6899 5870420 : padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6900 5865780 : padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
6901 5350790 : padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
6902 : {
6903 1985190 : return false;
6904 : }
6905 :
6906 : // Compute corners in source crs.
6907 :
6908 : // The transformation might not have preserved ordering of
6909 : // coordinates so do the necessary swapping (#5433).
6910 : // NOTE: this is really an approximative fix. To do something
6911 : // more precise we would for example need to compute the
6912 : // transformation of coordinates in the
6913 : // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
6914 : // coordinates, and take the bounding box of the got source
6915 : // coordinates.
6916 :
6917 5347040 : if (padfX[iDstX] > padfX2[iDstX])
6918 269148 : std::swap(padfX[iDstX], padfX2[iDstX]);
6919 :
6920 : // Detect situations where the target pixel is close to the
6921 : // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
6922 : // close to the left-most and right-most columns of the source
6923 : // raster. The 2 value below was experimentally determined to
6924 : // avoid false-positives and false-negatives.
6925 : // Addresses https://github.com/OSGeo/gdal/issues/6478
6926 5347040 : bWrapOverX = false;
6927 5347040 : const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
6928 5347040 : if (poWK->nSrcXOff == 0 && iDstX + 1 < poWK->nDstXSize &&
6929 3298690 : 2 * padfX[iDstX] - padfX[iDstX + 1] < nThresholdWrapOverX &&
6930 55362 : nSrcXSize - padfX2[iDstX] < nThresholdWrapOverX)
6931 : {
6932 : // Check there is a discontinuity by checking at mid-pixel.
6933 : // NOTE: all this remains fragile. To confidently
6934 : // detect antimeridian warping we should probably try to access
6935 : // georeferenced coordinates, and not rely only on tests on
6936 : // image space coordinates. But accessing georeferenced
6937 : // coordinates from here is not trivial, and we would for example
6938 : // have to handle both geographic, Mercator, etc.
6939 : // Let's hope this heuristics is good enough for now.
6940 1610 : double x = iDstX + 0.5 + poWK->nDstXOff;
6941 1610 : double y = iDstY + poWK->nDstYOff;
6942 1610 : double z = 0;
6943 1610 : int bSuccess = FALSE;
6944 1610 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y, &z,
6945 : &bSuccess);
6946 1610 : if (bSuccess && x < padfX[iDstX])
6947 : {
6948 1596 : bWrapOverX = true;
6949 1596 : std::swap(padfX[iDstX], padfX2[iDstX]);
6950 1596 : padfX2[iDstX] += nSrcXSize;
6951 : }
6952 : }
6953 :
6954 5347040 : dfXMin = padfX[iDstX] - poWK->nSrcXOff;
6955 5347040 : dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
6956 5347040 : constexpr double EPSILON = 1e-10;
6957 : // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
6958 5347040 : if (!(dfXMax > -EPSILON && dfXMin < nSrcXSize + EPSILON))
6959 15528 : return false;
6960 5331510 : iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPSILON), 0.0));
6961 5331510 : iSrcXMax = static_cast<int>(
6962 5331510 : std::min(ceil(dfXMax - EPSILON), static_cast<double>(INT_MAX)));
6963 5331510 : if (!bWrapOverX)
6964 5329910 : iSrcXMax = std::min(iSrcXMax, nSrcXSize);
6965 5331510 : if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
6966 472 : iSrcXMax++;
6967 :
6968 5331510 : if (padfY[iDstX] > padfY2[iDstX])
6969 270117 : std::swap(padfY[iDstX], padfY2[iDstX]);
6970 5331510 : dfYMin = padfY[iDstX] - poWK->nSrcYOff;
6971 5331510 : dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
6972 : // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
6973 5331510 : if (!(dfYMax > -EPSILON && dfYMin < nSrcYSize + EPSILON))
6974 13334 : return false;
6975 5318180 : iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPSILON), 0.0));
6976 5318180 : iSrcYMax = std::min(static_cast<int>(ceil(dfYMax - EPSILON)), nSrcYSize);
6977 5318180 : if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
6978 0 : iSrcYMax++;
6979 :
6980 5318180 : return true;
6981 : }
6982 :
6983 : /************************************************************************/
6984 : /* GWKModeRealType() */
6985 : /************************************************************************/
6986 :
6987 17780 : template <class T> static inline bool IsSame(T a, T b)
6988 : {
6989 17780 : return a == b;
6990 : }
6991 :
6992 0 : template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b)
6993 : {
6994 0 : return a == b || (CPLIsNan(a) && CPLIsNan(b));
6995 : }
6996 :
6997 18 : template <> bool IsSame<float>(float a, float b)
6998 : {
6999 18 : return a == b || (std::isnan(a) && std::isnan(b));
7000 : }
7001 :
7002 56 : template <> bool IsSame<double>(double a, double b)
7003 : {
7004 56 : return a == b || (std::isnan(a) && std::isnan(b));
7005 : }
7006 :
7007 19 : template <class T> static void GWKModeRealType(GWKJobStruct *psJob)
7008 : {
7009 19 : const GDALWarpKernel *poWK = psJob->poWK;
7010 19 : const int iYMin = psJob->iYMin;
7011 19 : const int iYMax = psJob->iYMax;
7012 19 : const int nDstXSize = poWK->nDstXSize;
7013 19 : const int nSrcXSize = poWK->nSrcXSize;
7014 19 : const int nSrcYSize = poWK->nSrcYSize;
7015 19 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7016 :
7017 19 : T *pVals = nullptr;
7018 19 : float *pafCounts = nullptr;
7019 :
7020 19 : if (nSrcXSize > 0 && nSrcYSize > 0)
7021 : {
7022 : pVals = static_cast<T *>(
7023 19 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(T)));
7024 : pafCounts = static_cast<float *>(
7025 19 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
7026 19 : if (pVals == nullptr || pafCounts == nullptr)
7027 : {
7028 0 : VSIFree(pVals);
7029 0 : VSIFree(pafCounts);
7030 0 : return;
7031 : }
7032 : }
7033 :
7034 : /* -------------------------------------------------------------------- */
7035 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7036 : /* scanlines worth of positions. */
7037 : /* -------------------------------------------------------------------- */
7038 :
7039 : double *padfX =
7040 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7041 : double *padfY =
7042 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7043 : double *padfZ =
7044 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7045 : double *padfX2 =
7046 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7047 : double *padfY2 =
7048 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7049 : double *padfZ2 =
7050 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7051 19 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7052 19 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7053 :
7054 19 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7055 19 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7056 19 : const double dfErrorThreshold = CPLAtof(
7057 19 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7058 19 : const bool bAvoidNoDataSingleBand =
7059 19 : poWK->nBands == 1 ||
7060 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7061 : "UNIFIED_SRC_NODATA", "FALSE"));
7062 :
7063 19 : const int nXMargin =
7064 19 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7065 19 : const int nYMargin =
7066 19 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7067 :
7068 : /* ==================================================================== */
7069 : /* Loop over output lines. */
7070 : /* ==================================================================== */
7071 116 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7072 : {
7073 97 : GWKAverageOrModeComputeLineCoords(
7074 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7075 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7076 :
7077 : // Loop over pixels in output scanline.
7078 3514 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7079 : {
7080 3417 : GPtrDiff_t iSrcOffset = 0;
7081 3417 : double dfDensity = 1.0;
7082 3417 : bool bHasFoundDensity = false;
7083 :
7084 3417 : bool bWrapOverX = false;
7085 3417 : double dfXMin = 0;
7086 3417 : double dfYMin = 0;
7087 3417 : double dfXMax = 0;
7088 3417 : double dfYMax = 0;
7089 3417 : int iSrcXMin = 0;
7090 3417 : int iSrcYMin = 0;
7091 3417 : int iSrcXMax = 0;
7092 3417 : int iSrcYMax = 0;
7093 3417 : if (!GWKAverageOrModeComputeSourceCoords(
7094 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7095 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7096 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7097 : {
7098 0 : continue;
7099 : }
7100 :
7101 3417 : const GPtrDiff_t iDstOffset =
7102 3417 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7103 :
7104 : // Loop processing each band.
7105 6834 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7106 : {
7107 3417 : double dfBandDensity = 0.0;
7108 :
7109 3417 : int nBins = 0;
7110 3417 : int iModeIndex = -1;
7111 3417 : T nVal{};
7112 :
7113 10248 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7114 : {
7115 6831 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7116 6831 : iSrcOffset =
7117 6831 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7118 20530 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7119 : iSrcX++, iSrcOffset++)
7120 : {
7121 13699 : if (bWrapOverX)
7122 0 : iSrcOffset =
7123 0 : (iSrcX % nSrcXSize) +
7124 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7125 :
7126 13699 : if (poWK->panUnifiedSrcValid != nullptr &&
7127 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7128 0 : continue;
7129 :
7130 13699 : if (GWKGetPixelT(poWK, iBand, iSrcOffset,
7131 27398 : &dfBandDensity, &nVal) &&
7132 13699 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7133 : {
7134 13699 : const double dfWeight =
7135 13699 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7136 :
7137 : // Check array for existing entry.
7138 13699 : int i = 0;
7139 29194 : for (i = 0; i < nBins; ++i)
7140 : {
7141 17807 : if (IsSame(pVals[i], nVal))
7142 : {
7143 :
7144 2312 : pafCounts[i] +=
7145 2312 : static_cast<float>(dfWeight);
7146 2312 : bool bValIsMaxCount =
7147 2312 : (pafCounts[i] > pafCounts[iModeIndex]);
7148 :
7149 2312 : if (!bValIsMaxCount &&
7150 1498 : pafCounts[i] == pafCounts[iModeIndex])
7151 : {
7152 1490 : switch (eTieStrategy)
7153 : {
7154 1477 : case GWKTS_First:
7155 1477 : break;
7156 6 : case GWKTS_Min:
7157 6 : bValIsMaxCount =
7158 6 : nVal < pVals[iModeIndex];
7159 6 : break;
7160 7 : case GWKTS_Max:
7161 7 : bValIsMaxCount =
7162 7 : nVal > pVals[iModeIndex];
7163 7 : break;
7164 : }
7165 : }
7166 :
7167 2312 : if (bValIsMaxCount)
7168 : {
7169 817 : iModeIndex = i;
7170 : }
7171 :
7172 2312 : break;
7173 : }
7174 : }
7175 :
7176 : // Add to arr if entry not already there.
7177 13699 : if (i == nBins)
7178 : {
7179 11387 : pVals[i] = nVal;
7180 11387 : pafCounts[i] = static_cast<float>(dfWeight);
7181 :
7182 11387 : if (iModeIndex < 0)
7183 3417 : iModeIndex = i;
7184 :
7185 11387 : ++nBins;
7186 : }
7187 : }
7188 : }
7189 : }
7190 :
7191 3417 : if (iModeIndex != -1)
7192 : {
7193 3417 : nVal = pVals[iModeIndex];
7194 3417 : dfBandDensity = 1;
7195 3417 : bHasFoundDensity = true;
7196 : }
7197 :
7198 : // We have a computed value from the source. Now apply it
7199 : // to the destination pixel
7200 3417 : if (bHasFoundDensity)
7201 : {
7202 3417 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
7203 : dfBandDensity, nVal,
7204 : bAvoidNoDataSingleBand);
7205 : }
7206 : }
7207 :
7208 3417 : if (!bHasFoundDensity)
7209 0 : continue;
7210 :
7211 3417 : if (!bAvoidNoDataSingleBand)
7212 : {
7213 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7214 : }
7215 :
7216 : /* --------------------------------------------------------------------
7217 : */
7218 : /* Update destination density/validity masks. */
7219 : /* --------------------------------------------------------------------
7220 : */
7221 3417 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7222 :
7223 3417 : if (poWK->panDstValid != nullptr)
7224 : {
7225 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7226 : }
7227 : } /* Next iDstX */
7228 :
7229 : /* --------------------------------------------------------------------
7230 : */
7231 : /* Report progress to the user, and optionally cancel out. */
7232 : /* --------------------------------------------------------------------
7233 : */
7234 97 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7235 0 : break;
7236 : }
7237 :
7238 : /* -------------------------------------------------------------------- */
7239 : /* Cleanup and return. */
7240 : /* -------------------------------------------------------------------- */
7241 19 : CPLFree(padfX);
7242 19 : CPLFree(padfY);
7243 19 : CPLFree(padfZ);
7244 19 : CPLFree(padfX2);
7245 19 : CPLFree(padfY2);
7246 19 : CPLFree(padfZ2);
7247 19 : CPLFree(pabSuccess);
7248 19 : CPLFree(pabSuccess2);
7249 19 : VSIFree(pVals);
7250 19 : VSIFree(pafCounts);
7251 : }
7252 :
7253 : /************************************************************************/
7254 : /* GWKModeComplexType() */
7255 : /************************************************************************/
7256 :
7257 8 : static void GWKModeComplexType(GWKJobStruct *psJob)
7258 : {
7259 8 : const GDALWarpKernel *poWK = psJob->poWK;
7260 8 : const int iYMin = psJob->iYMin;
7261 8 : const int iYMax = psJob->iYMax;
7262 8 : const int nDstXSize = poWK->nDstXSize;
7263 8 : const int nSrcXSize = poWK->nSrcXSize;
7264 8 : const int nSrcYSize = poWK->nSrcYSize;
7265 8 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7266 : const double dfMultFactorVerticalShiftPipeline =
7267 8 : poWK->bApplyVerticalShift
7268 8 : ? CPLAtof(CSLFetchNameValueDef(
7269 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
7270 : "1.0"))
7271 8 : : 0.0;
7272 : const bool bAvoidNoDataSingleBand =
7273 8 : poWK->nBands == 1 ||
7274 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7275 8 : "UNIFIED_SRC_NODATA", "FALSE"));
7276 :
7277 8 : double *padfRealVals = nullptr;
7278 8 : double *padfImagVals = nullptr;
7279 8 : float *pafCounts = nullptr;
7280 :
7281 8 : if (nSrcXSize > 0 && nSrcYSize > 0)
7282 : {
7283 : padfRealVals = static_cast<double *>(
7284 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
7285 : padfImagVals = static_cast<double *>(
7286 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
7287 : pafCounts = static_cast<float *>(
7288 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
7289 8 : if (padfRealVals == nullptr || padfImagVals == nullptr ||
7290 : pafCounts == nullptr)
7291 : {
7292 0 : VSIFree(padfRealVals);
7293 0 : VSIFree(padfImagVals);
7294 0 : VSIFree(pafCounts);
7295 0 : return;
7296 : }
7297 : }
7298 :
7299 : /* -------------------------------------------------------------------- */
7300 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7301 : /* scanlines worth of positions. */
7302 : /* -------------------------------------------------------------------- */
7303 :
7304 : double *padfX =
7305 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7306 : double *padfY =
7307 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7308 : double *padfZ =
7309 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7310 : double *padfX2 =
7311 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7312 : double *padfY2 =
7313 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7314 : double *padfZ2 =
7315 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7316 8 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7317 8 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7318 :
7319 8 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7320 8 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7321 8 : const double dfErrorThreshold = CPLAtof(
7322 8 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7323 :
7324 : const int nXMargin =
7325 8 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7326 : const int nYMargin =
7327 8 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7328 :
7329 : /* ==================================================================== */
7330 : /* Loop over output lines. */
7331 : /* ==================================================================== */
7332 16 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7333 : {
7334 8 : GWKAverageOrModeComputeLineCoords(
7335 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7336 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7337 :
7338 : // Loop over pixels in output scanline.
7339 16 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7340 : {
7341 8 : GPtrDiff_t iSrcOffset = 0;
7342 8 : double dfDensity = 1.0;
7343 8 : bool bHasFoundDensity = false;
7344 :
7345 8 : bool bWrapOverX = false;
7346 8 : double dfXMin = 0;
7347 8 : double dfYMin = 0;
7348 8 : double dfXMax = 0;
7349 8 : double dfYMax = 0;
7350 8 : int iSrcXMin = 0;
7351 8 : int iSrcYMin = 0;
7352 8 : int iSrcXMax = 0;
7353 8 : int iSrcYMax = 0;
7354 8 : if (!GWKAverageOrModeComputeSourceCoords(
7355 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7356 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7357 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7358 : {
7359 0 : continue;
7360 : }
7361 :
7362 8 : const GPtrDiff_t iDstOffset =
7363 8 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7364 :
7365 : // Loop processing each band.
7366 16 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7367 : {
7368 8 : double dfBandDensity = 0.0;
7369 :
7370 8 : int nBins = 0;
7371 8 : int iModeIndex = -1;
7372 8 : double dfValueReal = 0;
7373 8 : double dfValueImag = 0;
7374 :
7375 16 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7376 : {
7377 8 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7378 8 : iSrcOffset =
7379 8 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7380 38 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7381 : iSrcX++, iSrcOffset++)
7382 : {
7383 30 : if (bWrapOverX)
7384 0 : iSrcOffset =
7385 0 : (iSrcX % nSrcXSize) +
7386 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7387 :
7388 30 : if (poWK->panUnifiedSrcValid != nullptr &&
7389 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7390 0 : continue;
7391 :
7392 30 : if (GWKGetPixelValue(poWK, iBand, iSrcOffset,
7393 : &dfBandDensity, &dfValueReal,
7394 60 : &dfValueImag) &&
7395 30 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7396 : {
7397 30 : const double dfWeight =
7398 30 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7399 :
7400 : // Check array for existing entry.
7401 30 : int i = 0;
7402 49 : for (i = 0; i < nBins; ++i)
7403 : {
7404 47 : if (IsSame(padfRealVals[i], dfValueReal) &&
7405 14 : IsSame(padfImagVals[i], dfValueImag))
7406 : {
7407 :
7408 14 : pafCounts[i] +=
7409 14 : static_cast<float>(dfWeight);
7410 14 : bool bValIsMaxCount =
7411 14 : (pafCounts[i] > pafCounts[iModeIndex]);
7412 :
7413 14 : if (!bValIsMaxCount &&
7414 6 : pafCounts[i] == pafCounts[iModeIndex])
7415 : {
7416 3 : switch (eTieStrategy)
7417 : {
7418 3 : case GWKTS_First:
7419 3 : break;
7420 0 : case GWKTS_Min:
7421 0 : bValIsMaxCount =
7422 0 : dfValueReal <
7423 0 : padfRealVals[iModeIndex];
7424 0 : break;
7425 0 : case GWKTS_Max:
7426 0 : bValIsMaxCount =
7427 0 : dfValueReal >
7428 0 : padfRealVals[iModeIndex];
7429 0 : break;
7430 : }
7431 : }
7432 :
7433 14 : if (bValIsMaxCount)
7434 : {
7435 8 : iModeIndex = i;
7436 : }
7437 :
7438 14 : break;
7439 : }
7440 : }
7441 :
7442 : // Add to arr if entry not already there.
7443 30 : if (i == nBins)
7444 : {
7445 16 : padfRealVals[i] = dfValueReal;
7446 16 : padfImagVals[i] = dfValueImag;
7447 16 : pafCounts[i] = static_cast<float>(dfWeight);
7448 :
7449 16 : if (iModeIndex < 0)
7450 8 : iModeIndex = i;
7451 :
7452 16 : ++nBins;
7453 : }
7454 : }
7455 : }
7456 : }
7457 :
7458 8 : if (iModeIndex != -1)
7459 : {
7460 8 : dfValueReal = padfRealVals[iModeIndex];
7461 8 : dfValueImag = padfImagVals[iModeIndex];
7462 8 : dfBandDensity = 1;
7463 :
7464 8 : if (poWK->bApplyVerticalShift)
7465 : {
7466 0 : if (!std::isfinite(padfZ[iDstX]))
7467 0 : continue;
7468 : // Subtract padfZ[] since the coordinate
7469 : // transformation is from target to source
7470 0 : dfValueReal =
7471 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7472 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
7473 : }
7474 :
7475 8 : bHasFoundDensity = true;
7476 : }
7477 :
7478 : // We have a computed value from the source. Now apply it
7479 : // to the destination pixel
7480 8 : if (bHasFoundDensity)
7481 : {
7482 8 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
7483 : dfValueReal, dfValueImag,
7484 : bAvoidNoDataSingleBand);
7485 : }
7486 : }
7487 :
7488 8 : if (!bHasFoundDensity)
7489 0 : continue;
7490 :
7491 8 : if (!bAvoidNoDataSingleBand)
7492 : {
7493 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7494 : }
7495 :
7496 : /* --------------------------------------------------------------------
7497 : */
7498 : /* Update destination density/validity masks. */
7499 : /* --------------------------------------------------------------------
7500 : */
7501 8 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7502 :
7503 8 : if (poWK->panDstValid != nullptr)
7504 : {
7505 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7506 : }
7507 : } /* Next iDstX */
7508 :
7509 : /* --------------------------------------------------------------------
7510 : */
7511 : /* Report progress to the user, and optionally cancel out. */
7512 : /* --------------------------------------------------------------------
7513 : */
7514 8 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7515 0 : break;
7516 : }
7517 :
7518 : /* -------------------------------------------------------------------- */
7519 : /* Cleanup and return. */
7520 : /* -------------------------------------------------------------------- */
7521 8 : CPLFree(padfX);
7522 8 : CPLFree(padfY);
7523 8 : CPLFree(padfZ);
7524 8 : CPLFree(padfX2);
7525 8 : CPLFree(padfY2);
7526 8 : CPLFree(padfZ2);
7527 8 : CPLFree(pabSuccess);
7528 8 : CPLFree(pabSuccess2);
7529 8 : VSIFree(padfRealVals);
7530 8 : VSIFree(padfImagVals);
7531 8 : VSIFree(pafCounts);
7532 : }
7533 :
7534 : /************************************************************************/
7535 : /* GWKAverageOrModeThread() */
7536 : /************************************************************************/
7537 :
7538 : // Overall logic based on GWKGeneralCaseThread().
7539 246 : static void GWKAverageOrModeThread(void *pData)
7540 : {
7541 246 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
7542 246 : const GDALWarpKernel *poWK = psJob->poWK;
7543 246 : const int iYMin = psJob->iYMin;
7544 246 : const int iYMax = psJob->iYMax;
7545 : const double dfMultFactorVerticalShiftPipeline =
7546 246 : poWK->bApplyVerticalShift
7547 246 : ? CPLAtof(CSLFetchNameValueDef(
7548 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
7549 : "1.0"))
7550 246 : : 0.0;
7551 : const bool bAvoidNoDataSingleBand =
7552 342 : poWK->nBands == 1 ||
7553 96 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7554 246 : "UNIFIED_SRC_NODATA", "FALSE"));
7555 :
7556 246 : const int nDstXSize = poWK->nDstXSize;
7557 246 : const int nSrcXSize = poWK->nSrcXSize;
7558 :
7559 : /* -------------------------------------------------------------------- */
7560 : /* Find out which algorithm to use (small optim.) */
7561 : /* -------------------------------------------------------------------- */
7562 :
7563 : // Only used for GRA_Mode
7564 246 : float *pafCounts = nullptr;
7565 246 : int nBins = 0;
7566 246 : int nBinsOffset = 0;
7567 246 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7568 :
7569 : // Only used with Q1, Med and Q3
7570 246 : float quant = 0.0f;
7571 :
7572 : // To control array allocation only when data type is complex
7573 246 : const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
7574 :
7575 246 : if (poWK->eResample == GRA_Mode)
7576 : {
7577 45 : if (poWK->bApplyVerticalShift)
7578 : {
7579 0 : return GWKModeComplexType(psJob);
7580 : }
7581 :
7582 45 : switch (poWK->eWorkingDataType)
7583 : {
7584 7 : case GDT_UInt8:
7585 7 : nBins = 256;
7586 7 : break;
7587 :
7588 1 : case GDT_Int8:
7589 1 : nBins = 256;
7590 1 : nBinsOffset = nBins / 2;
7591 1 : break;
7592 :
7593 1 : case GDT_UInt16:
7594 1 : nBins = 65536;
7595 1 : break;
7596 :
7597 9 : case GDT_Int16:
7598 9 : nBins = 65536;
7599 9 : nBinsOffset = nBins / 2;
7600 9 : break;
7601 :
7602 10 : case GDT_Int32:
7603 10 : return GWKModeRealType<int32_t>(psJob);
7604 :
7605 1 : case GDT_UInt32:
7606 1 : return GWKModeRealType<uint32_t>(psJob);
7607 :
7608 1 : case GDT_Int64:
7609 1 : return GWKModeRealType<int64_t>(psJob);
7610 :
7611 1 : case GDT_UInt64:
7612 1 : return GWKModeRealType<uint64_t>(psJob);
7613 :
7614 0 : case GDT_Float16:
7615 0 : return GWKModeRealType<GFloat16>(psJob);
7616 :
7617 4 : case GDT_Float32:
7618 4 : return GWKModeRealType<float>(psJob);
7619 :
7620 2 : case GDT_Float64:
7621 2 : return GWKModeRealType<double>(psJob);
7622 :
7623 8 : case GDT_CInt16:
7624 : case GDT_CInt32:
7625 : case GDT_CFloat16:
7626 : case GDT_CFloat32:
7627 : case GDT_CFloat64:
7628 8 : return GWKModeComplexType(psJob);
7629 :
7630 0 : case GDT_Unknown:
7631 : case GDT_TypeCount:
7632 0 : CPLAssert(false);
7633 : return;
7634 : }
7635 :
7636 18 : if (nBins)
7637 : {
7638 : pafCounts =
7639 18 : static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
7640 18 : if (pafCounts == nullptr)
7641 0 : return;
7642 : }
7643 : }
7644 201 : else if (poWK->eResample == GRA_Med)
7645 : {
7646 6 : quant = 0.5f;
7647 : }
7648 195 : else if (poWK->eResample == GRA_Q1)
7649 : {
7650 10 : quant = 0.25f;
7651 : }
7652 185 : else if (poWK->eResample == GRA_Q3)
7653 : {
7654 5 : quant = 0.75f;
7655 : }
7656 180 : else if (poWK->eResample != GRA_Average && poWK->eResample != GRA_RMS &&
7657 11 : poWK->eResample != GRA_Min && poWK->eResample != GRA_Max)
7658 : {
7659 : // Other resample algorithms not permitted here.
7660 0 : CPLError(CE_Fatal, CPLE_AppDefined,
7661 : "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
7662 : "illegal resample");
7663 : }
7664 :
7665 219 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread()");
7666 :
7667 : /* -------------------------------------------------------------------- */
7668 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7669 : /* scanlines worth of positions. */
7670 : /* -------------------------------------------------------------------- */
7671 :
7672 : double *padfX =
7673 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7674 : double *padfY =
7675 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7676 : double *padfZ =
7677 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7678 : double *padfX2 =
7679 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7680 : double *padfY2 =
7681 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7682 : double *padfZ2 =
7683 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7684 219 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7685 219 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7686 :
7687 219 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7688 219 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7689 219 : const double dfErrorThreshold = CPLAtof(
7690 219 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7691 :
7692 : const double dfExcludedValuesThreshold =
7693 219 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7694 : "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
7695 219 : 100.0;
7696 : const double dfNodataValuesThreshold =
7697 219 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7698 : "NODATA_VALUES_PCT_THRESHOLD", "100")) /
7699 219 : 100.0;
7700 :
7701 : const int nXMargin =
7702 219 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7703 : const int nYMargin =
7704 219 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7705 :
7706 : /* ==================================================================== */
7707 : /* Loop over output lines. */
7708 : /* ==================================================================== */
7709 28777 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7710 : {
7711 28558 : GWKAverageOrModeComputeLineCoords(
7712 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7713 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7714 :
7715 : /* ====================================================================
7716 : */
7717 : /* Loop over pixels in output scanline. */
7718 : /* ====================================================================
7719 : */
7720 7357360 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7721 : {
7722 7328800 : GPtrDiff_t iSrcOffset = 0;
7723 7328800 : double dfDensity = 1.0;
7724 7328800 : bool bHasFoundDensity = false;
7725 :
7726 7328800 : bool bWrapOverX = false;
7727 7328800 : double dfXMin = 0;
7728 7328800 : double dfYMin = 0;
7729 7328800 : double dfXMax = 0;
7730 7328800 : double dfYMax = 0;
7731 7328800 : int iSrcXMin = 0;
7732 7328800 : int iSrcYMin = 0;
7733 7328800 : int iSrcXMax = 0;
7734 7328800 : int iSrcYMax = 0;
7735 7328800 : if (!GWKAverageOrModeComputeSourceCoords(
7736 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7737 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7738 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7739 : {
7740 3158560 : continue;
7741 : }
7742 :
7743 5314750 : const GPtrDiff_t iDstOffset =
7744 5314750 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7745 :
7746 5314750 : bool bDone = false;
7747 :
7748 : // Special Average mode where we process all bands together,
7749 : // to avoid averaging tuples that match an entry of m_aadfExcludedValues
7750 5314750 : constexpr double EPSILON = 1e-10;
7751 14838200 : if (poWK->eResample == GRA_Average &&
7752 4208720 : (!poWK->m_aadfExcludedValues.empty() ||
7753 393224 : dfNodataValuesThreshold < 1 - EPSILON) &&
7754 9523480 : !poWK->bApplyVerticalShift && !bIsComplex)
7755 : {
7756 393224 : double dfTotalWeightInvalid = 0.0;
7757 393224 : double dfTotalWeightExcluded = 0.0;
7758 393224 : double dfTotalWeightRegular = 0.0;
7759 786448 : std::vector<double> adfValueReal(poWK->nBands, 0);
7760 786448 : std::vector<double> adfValueAveraged(poWK->nBands, 0);
7761 : std::vector<int> anCountExcludedValues(
7762 393224 : poWK->m_aadfExcludedValues.size(), 0);
7763 :
7764 1179670 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7765 : {
7766 786448 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7767 786448 : iSrcOffset =
7768 786448 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7769 2359340 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7770 : iSrcX++, iSrcOffset++)
7771 : {
7772 1572900 : if (bWrapOverX)
7773 0 : iSrcOffset =
7774 0 : (iSrcX % nSrcXSize) +
7775 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7776 :
7777 1572900 : const double dfWeight =
7778 1572900 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7779 1572900 : if (dfWeight <= 0)
7780 0 : continue;
7781 :
7782 1572910 : if (poWK->panUnifiedSrcValid != nullptr &&
7783 12 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7784 : {
7785 3 : dfTotalWeightInvalid += dfWeight;
7786 3 : continue;
7787 : }
7788 :
7789 1572890 : bool bAllValid = true;
7790 2359410 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7791 : {
7792 2097230 : double dfBandDensity = 0;
7793 2097230 : double dfValueImagTmp = 0;
7794 2883740 : if (!(GWKGetPixelValue(
7795 : poWK, iBand, iSrcOffset, &dfBandDensity,
7796 2097230 : &adfValueReal[iBand], &dfValueImagTmp) &&
7797 786513 : dfBandDensity > BAND_DENSITY_THRESHOLD))
7798 : {
7799 1310720 : bAllValid = false;
7800 1310720 : break;
7801 : }
7802 : }
7803 :
7804 1572890 : if (!bAllValid)
7805 : {
7806 1310720 : dfTotalWeightInvalid += dfWeight;
7807 1310720 : continue;
7808 : }
7809 :
7810 262177 : bool bExcludedValueFound = false;
7811 393263 : for (size_t i = 0;
7812 393263 : i < poWK->m_aadfExcludedValues.size(); ++i)
7813 : {
7814 131092 : if (poWK->m_aadfExcludedValues[i] == adfValueReal)
7815 : {
7816 6 : bExcludedValueFound = true;
7817 6 : ++anCountExcludedValues[i];
7818 6 : dfTotalWeightExcluded += dfWeight;
7819 6 : break;
7820 : }
7821 : }
7822 262177 : if (!bExcludedValueFound)
7823 : {
7824 : // Weighted incremental algorithm mean
7825 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7826 262171 : dfTotalWeightRegular += dfWeight;
7827 1048670 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7828 : {
7829 786495 : adfValueAveraged[iBand] +=
7830 1572990 : (dfWeight / dfTotalWeightRegular) *
7831 1572990 : (adfValueReal[iBand] -
7832 786495 : adfValueAveraged[iBand]);
7833 : }
7834 : }
7835 : }
7836 : }
7837 :
7838 393224 : const double dfTotalWeight = dfTotalWeightInvalid +
7839 : dfTotalWeightExcluded +
7840 : dfTotalWeightRegular;
7841 393224 : if (dfTotalWeightInvalid > 0 &&
7842 : dfTotalWeightInvalid >=
7843 327685 : dfNodataValuesThreshold * dfTotalWeight)
7844 : {
7845 : // Do nothing. Let bHasFoundDensity to false.
7846 : }
7847 65543 : else if (dfTotalWeightExcluded > 0 &&
7848 : dfTotalWeightExcluded >=
7849 6 : dfExcludedValuesThreshold * dfTotalWeight)
7850 : {
7851 : // Find the most represented excluded value tuple
7852 2 : size_t iExcludedValue = 0;
7853 2 : int nExcludedValueCount = 0;
7854 4 : for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
7855 : ++i)
7856 : {
7857 2 : if (anCountExcludedValues[i] > nExcludedValueCount)
7858 : {
7859 2 : iExcludedValue = i;
7860 2 : nExcludedValueCount = anCountExcludedValues[i];
7861 : }
7862 : }
7863 :
7864 2 : bHasFoundDensity = true;
7865 :
7866 8 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7867 : {
7868 6 : GWKSetPixelValue(
7869 : poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
7870 6 : poWK->m_aadfExcludedValues[iExcludedValue][iBand],
7871 : 0, bAvoidNoDataSingleBand);
7872 : }
7873 :
7874 2 : if (!bAvoidNoDataSingleBand)
7875 : {
7876 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7877 2 : }
7878 : }
7879 65541 : else if (dfTotalWeightRegular > 0)
7880 : {
7881 65541 : bHasFoundDensity = true;
7882 :
7883 262160 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7884 : {
7885 196619 : GWKSetPixelValue(poWK, iBand, iDstOffset,
7886 : /* dfBandDensity = */ 1.0,
7887 196619 : adfValueAveraged[iBand], 0,
7888 : bAvoidNoDataSingleBand);
7889 : }
7890 :
7891 65541 : if (!bAvoidNoDataSingleBand)
7892 : {
7893 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7894 : }
7895 : }
7896 :
7897 : // Skip below loop on bands
7898 393224 : bDone = true;
7899 : }
7900 :
7901 : /* ====================================================================
7902 : */
7903 : /* Loop processing each band. */
7904 : /* ====================================================================
7905 : */
7906 :
7907 17670500 : for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
7908 : {
7909 12355700 : double dfBandDensity = 0.0;
7910 12355700 : double dfValueReal = 0.0;
7911 12355700 : double dfValueImag = 0.0;
7912 12355700 : double dfValueRealTmp = 0.0;
7913 12355700 : double dfValueImagTmp = 0.0;
7914 :
7915 : /* --------------------------------------------------------------------
7916 : */
7917 : /* Collect the source value. */
7918 : /* --------------------------------------------------------------------
7919 : */
7920 :
7921 : // Loop over source lines and pixels - 3 possible algorithms.
7922 :
7923 12355700 : if (poWK->eResample == GRA_Average)
7924 : {
7925 9833240 : double dfTotalWeight = 0.0;
7926 :
7927 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7928 : // in gcore/overview.cpp.
7929 25243600 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7930 : {
7931 15410300 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7932 15410300 : iSrcOffset = iSrcXMin +
7933 15410300 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7934 44761400 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7935 : iSrcX++, iSrcOffset++)
7936 : {
7937 29351100 : if (bWrapOverX)
7938 2571 : iSrcOffset =
7939 2571 : (iSrcX % nSrcXSize) +
7940 2571 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7941 :
7942 29351100 : if (poWK->panUnifiedSrcValid != nullptr &&
7943 4 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7944 : iSrcOffset))
7945 : {
7946 1 : continue;
7947 : }
7948 :
7949 29351100 : if (GWKGetPixelValue(
7950 : poWK, iBand, iSrcOffset, &dfBandDensity,
7951 48239400 : &dfValueRealTmp, &dfValueImagTmp) &&
7952 18888400 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7953 : {
7954 18888400 : const double dfWeight =
7955 18888400 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7956 18888400 : if (dfWeight > 0)
7957 : {
7958 : // Weighted incremental algorithm mean
7959 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7960 18888400 : dfTotalWeight += dfWeight;
7961 18888400 : dfValueReal +=
7962 18888400 : (dfWeight / dfTotalWeight) *
7963 18888400 : (dfValueRealTmp - dfValueReal);
7964 18888400 : if (bIsComplex)
7965 : {
7966 252 : dfValueImag +=
7967 252 : (dfWeight / dfTotalWeight) *
7968 252 : (dfValueImagTmp - dfValueImag);
7969 : }
7970 : }
7971 : }
7972 : }
7973 : }
7974 :
7975 9833240 : if (dfTotalWeight > 0)
7976 : {
7977 7530420 : if (poWK->bApplyVerticalShift)
7978 : {
7979 0 : if (!std::isfinite(padfZ[iDstX]))
7980 0 : continue;
7981 : // Subtract padfZ[] since the coordinate
7982 : // transformation is from target to source
7983 0 : dfValueReal =
7984 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7985 0 : padfZ[iDstX] *
7986 : dfMultFactorVerticalShiftPipeline;
7987 : }
7988 :
7989 7530420 : dfBandDensity = 1;
7990 7530420 : bHasFoundDensity = true;
7991 : }
7992 : } // GRA_Average.
7993 :
7994 2522460 : else if (poWK->eResample == GRA_RMS)
7995 : {
7996 300416 : double dfTotalReal = 0.0;
7997 300416 : double dfTotalImag = 0.0;
7998 300416 : double dfTotalWeight = 0.0;
7999 : // This code adapted from GDALDownsampleChunk32R_AverageT()
8000 : // in gcore/overview.cpp.
8001 630578 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8002 : {
8003 330162 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
8004 330162 : iSrcOffset = iSrcXMin +
8005 330162 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8006 772930 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8007 : iSrcX++, iSrcOffset++)
8008 : {
8009 442768 : if (bWrapOverX)
8010 1371 : iSrcOffset =
8011 1371 : (iSrcX % nSrcXSize) +
8012 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8013 :
8014 442768 : if (poWK->panUnifiedSrcValid != nullptr &&
8015 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8016 : iSrcOffset))
8017 : {
8018 0 : continue;
8019 : }
8020 :
8021 442768 : if (GWKGetPixelValue(
8022 : poWK, iBand, iSrcOffset, &dfBandDensity,
8023 885536 : &dfValueRealTmp, &dfValueImagTmp) &&
8024 442768 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8025 : {
8026 442768 : const double dfWeight =
8027 442768 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
8028 442768 : dfTotalWeight += dfWeight;
8029 442768 : dfTotalReal +=
8030 442768 : dfValueRealTmp * dfValueRealTmp * dfWeight;
8031 442768 : if (bIsComplex)
8032 48 : dfTotalImag += dfValueImagTmp *
8033 48 : dfValueImagTmp * dfWeight;
8034 : }
8035 : }
8036 : }
8037 :
8038 300416 : if (dfTotalWeight > 0)
8039 : {
8040 300416 : dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
8041 :
8042 300416 : if (poWK->bApplyVerticalShift)
8043 : {
8044 0 : if (!std::isfinite(padfZ[iDstX]))
8045 0 : continue;
8046 : // Subtract padfZ[] since the coordinate
8047 : // transformation is from target to source
8048 0 : dfValueReal =
8049 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8050 0 : padfZ[iDstX] *
8051 : dfMultFactorVerticalShiftPipeline;
8052 : }
8053 :
8054 300416 : if (bIsComplex)
8055 12 : dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
8056 :
8057 300416 : dfBandDensity = 1;
8058 300416 : bHasFoundDensity = true;
8059 : }
8060 : } // GRA_RMS.
8061 :
8062 2222040 : else if (poWK->eResample == GRA_Mode)
8063 : {
8064 496623 : float fMaxCount = 0.0f;
8065 496623 : int nMode = -1;
8066 496623 : bool bHasSourceValues = false;
8067 :
8068 496623 : memset(pafCounts, 0, nBins * sizeof(float));
8069 :
8070 1167120 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8071 : {
8072 670495 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
8073 670495 : iSrcOffset = iSrcXMin +
8074 670495 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8075 1964680 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8076 : iSrcX++, iSrcOffset++)
8077 : {
8078 1294190 : if (bWrapOverX)
8079 1371 : iSrcOffset =
8080 1371 : (iSrcX % nSrcXSize) +
8081 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8082 :
8083 1294190 : if (poWK->panUnifiedSrcValid != nullptr &&
8084 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8085 : iSrcOffset))
8086 0 : continue;
8087 :
8088 1294190 : if (GWKGetPixelValue(
8089 : poWK, iBand, iSrcOffset, &dfBandDensity,
8090 2588370 : &dfValueRealTmp, &dfValueImagTmp) &&
8091 1294190 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8092 : {
8093 1294190 : bHasSourceValues = true;
8094 1294190 : const int nVal =
8095 1294190 : static_cast<int>(dfValueRealTmp);
8096 1294190 : const int iBin = nVal + nBinsOffset;
8097 1294190 : const double dfWeight =
8098 1294190 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
8099 :
8100 : // Sum the density.
8101 1294190 : pafCounts[iBin] += static_cast<float>(dfWeight);
8102 : // Is it the most common value so far?
8103 1294190 : bool bUpdateMode = pafCounts[iBin] > fMaxCount;
8104 1294190 : if (!bUpdateMode &&
8105 227545 : pafCounts[iBin] == fMaxCount)
8106 : {
8107 15866 : switch (eTieStrategy)
8108 : {
8109 15858 : case GWKTS_First:
8110 15858 : break;
8111 4 : case GWKTS_Min:
8112 4 : bUpdateMode = nVal < nMode;
8113 4 : break;
8114 4 : case GWKTS_Max:
8115 4 : bUpdateMode = nVal > nMode;
8116 4 : break;
8117 : }
8118 : }
8119 1294190 : if (bUpdateMode)
8120 : {
8121 1066640 : nMode = nVal;
8122 1066640 : fMaxCount = pafCounts[iBin];
8123 : }
8124 : }
8125 : }
8126 : }
8127 :
8128 496623 : if (bHasSourceValues)
8129 : {
8130 496623 : dfValueReal = nMode;
8131 496623 : dfBandDensity = 1;
8132 496623 : bHasFoundDensity = true;
8133 : }
8134 : } // GRA_Mode.
8135 :
8136 1725420 : else if (poWK->eResample == GRA_Max)
8137 : {
8138 335037 : bool bFoundValid = false;
8139 335037 : double dfTotalReal = cpl::NumericLimits<double>::lowest();
8140 : // This code adapted from nAlgo 1 method, GRA_Average.
8141 842572 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8142 : {
8143 507535 : iSrcOffset = iSrcXMin +
8144 507535 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8145 1638060 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8146 : iSrcX++, iSrcOffset++)
8147 : {
8148 1130520 : if (bWrapOverX)
8149 1371 : iSrcOffset =
8150 1371 : (iSrcX % nSrcXSize) +
8151 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8152 :
8153 1133330 : if (poWK->panUnifiedSrcValid != nullptr &&
8154 2809 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8155 : iSrcOffset))
8156 : {
8157 2446 : continue;
8158 : }
8159 :
8160 : // Returns pixel value if it is not no data.
8161 1128070 : if (GWKGetPixelValue(
8162 : poWK, iBand, iSrcOffset, &dfBandDensity,
8163 2256150 : &dfValueRealTmp, &dfValueImagTmp) &&
8164 1128070 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8165 : {
8166 1128070 : bFoundValid = true;
8167 1128070 : if (dfTotalReal < dfValueRealTmp)
8168 : {
8169 463372 : dfTotalReal = dfValueRealTmp;
8170 : }
8171 : }
8172 : }
8173 : }
8174 :
8175 335037 : if (bFoundValid)
8176 : {
8177 335037 : dfValueReal = dfTotalReal;
8178 :
8179 335037 : if (poWK->bApplyVerticalShift)
8180 : {
8181 0 : if (!std::isfinite(padfZ[iDstX]))
8182 0 : continue;
8183 : // Subtract padfZ[] since the coordinate
8184 : // transformation is from target to source
8185 0 : dfValueReal =
8186 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8187 0 : padfZ[iDstX] *
8188 : dfMultFactorVerticalShiftPipeline;
8189 : }
8190 :
8191 335037 : dfBandDensity = 1;
8192 335037 : bHasFoundDensity = true;
8193 : }
8194 : }
8195 :
8196 1390380 : else if (poWK->eResample == GRA_Min)
8197 : {
8198 335012 : bool bFoundValid = false;
8199 335012 : double dfTotalReal = cpl::NumericLimits<double>::max();
8200 : // This code adapted from nAlgo 1 method, GRA_Average.
8201 842282 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8202 : {
8203 507270 : iSrcOffset = iSrcXMin +
8204 507270 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8205 1634980 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8206 : iSrcX++, iSrcOffset++)
8207 : {
8208 1127710 : if (bWrapOverX)
8209 1371 : iSrcOffset =
8210 1371 : (iSrcX % nSrcXSize) +
8211 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8212 :
8213 1127710 : if (poWK->panUnifiedSrcValid != nullptr &&
8214 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8215 : iSrcOffset))
8216 : {
8217 0 : continue;
8218 : }
8219 :
8220 : // Returns pixel value if it is not no data.
8221 1127710 : if (GWKGetPixelValue(
8222 : poWK, iBand, iSrcOffset, &dfBandDensity,
8223 2255420 : &dfValueRealTmp, &dfValueImagTmp) &&
8224 1127710 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8225 : {
8226 1127710 : bFoundValid = true;
8227 1127710 : if (dfTotalReal > dfValueRealTmp)
8228 : {
8229 464157 : dfTotalReal = dfValueRealTmp;
8230 : }
8231 : }
8232 : }
8233 : }
8234 :
8235 335012 : if (bFoundValid)
8236 : {
8237 335012 : dfValueReal = dfTotalReal;
8238 :
8239 335012 : if (poWK->bApplyVerticalShift)
8240 : {
8241 0 : if (!std::isfinite(padfZ[iDstX]))
8242 0 : continue;
8243 : // Subtract padfZ[] since the coordinate
8244 : // transformation is from target to source
8245 0 : dfValueReal =
8246 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8247 0 : padfZ[iDstX] *
8248 : dfMultFactorVerticalShiftPipeline;
8249 : }
8250 :
8251 335012 : dfBandDensity = 1;
8252 335012 : bHasFoundDensity = true;
8253 : }
8254 : } // GRA_Min.
8255 :
8256 : else
8257 : // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
8258 : {
8259 1055370 : CPLAssert(quant > 0.0f);
8260 :
8261 1055370 : bool bFoundValid = false;
8262 1055370 : std::vector<double> dfRealValuesTmp;
8263 :
8264 : // This code adapted from nAlgo 1 method, GRA_Average.
8265 2677810 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8266 : {
8267 1622440 : iSrcOffset = iSrcXMin +
8268 1622440 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8269 5205220 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8270 : iSrcX++, iSrcOffset++)
8271 : {
8272 3582770 : if (bWrapOverX)
8273 4113 : iSrcOffset =
8274 4113 : (iSrcX % nSrcXSize) +
8275 4113 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8276 :
8277 3779380 : if (poWK->panUnifiedSrcValid != nullptr &&
8278 196608 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8279 : iSrcOffset))
8280 : {
8281 195449 : continue;
8282 : }
8283 :
8284 : // Returns pixel value if it is not no data.
8285 3387320 : if (GWKGetPixelValue(
8286 : poWK, iBand, iSrcOffset, &dfBandDensity,
8287 6774650 : &dfValueRealTmp, &dfValueImagTmp) &&
8288 3387320 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8289 : {
8290 3387320 : bFoundValid = true;
8291 3387320 : dfRealValuesTmp.push_back(dfValueRealTmp);
8292 : }
8293 : }
8294 : }
8295 :
8296 1055370 : if (bFoundValid)
8297 : {
8298 1006150 : std::sort(dfRealValuesTmp.begin(),
8299 : dfRealValuesTmp.end());
8300 : int quantIdx = static_cast<int>(
8301 1006150 : std::ceil(quant * dfRealValuesTmp.size() - 1));
8302 1006150 : dfValueReal = dfRealValuesTmp[quantIdx];
8303 :
8304 1006150 : if (poWK->bApplyVerticalShift)
8305 : {
8306 0 : if (!std::isfinite(padfZ[iDstX]))
8307 0 : continue;
8308 : // Subtract padfZ[] since the coordinate
8309 : // transformation is from target to source
8310 0 : dfValueReal =
8311 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8312 0 : padfZ[iDstX] *
8313 : dfMultFactorVerticalShiftPipeline;
8314 : }
8315 :
8316 1006150 : dfBandDensity = 1;
8317 1006150 : bHasFoundDensity = true;
8318 1006150 : dfRealValuesTmp.clear();
8319 : }
8320 : } // Quantile.
8321 :
8322 : /* --------------------------------------------------------------------
8323 : */
8324 : /* We have a computed value from the source. Now apply it
8325 : * to */
8326 : /* the destination pixel. */
8327 : /* --------------------------------------------------------------------
8328 : */
8329 12355700 : if (bHasFoundDensity)
8330 : {
8331 : // TODO: Should we compute dfBandDensity in fct of
8332 : // nCount/nCount2, or use as a threshold to set the dest
8333 : // value?
8334 : // dfBandDensity = (float) nCount / nCount2;
8335 : // if( (float) nCount / nCount2 > 0.1 )
8336 : // or fix gdalwarp crop_to_cutline to crop partially
8337 : // overlapping pixels.
8338 10003600 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8339 : dfValueReal, dfValueImag,
8340 : bAvoidNoDataSingleBand);
8341 : }
8342 : }
8343 :
8344 5314750 : if (!bHasFoundDensity)
8345 1144510 : continue;
8346 :
8347 4170240 : if (!bAvoidNoDataSingleBand)
8348 : {
8349 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
8350 : }
8351 :
8352 : /* --------------------------------------------------------------------
8353 : */
8354 : /* Update destination density/validity masks. */
8355 : /* --------------------------------------------------------------------
8356 : */
8357 4170240 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
8358 :
8359 4170240 : if (poWK->panDstValid != nullptr)
8360 : {
8361 1184 : CPLMaskSet(poWK->panDstValid, iDstOffset);
8362 : }
8363 : } /* Next iDstX */
8364 :
8365 : /* --------------------------------------------------------------------
8366 : */
8367 : /* Report progress to the user, and optionally cancel out. */
8368 : /* --------------------------------------------------------------------
8369 : */
8370 28558 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8371 0 : break;
8372 : }
8373 :
8374 : /* -------------------------------------------------------------------- */
8375 : /* Cleanup and return. */
8376 : /* -------------------------------------------------------------------- */
8377 219 : CPLFree(padfX);
8378 219 : CPLFree(padfY);
8379 219 : CPLFree(padfZ);
8380 219 : CPLFree(padfX2);
8381 219 : CPLFree(padfY2);
8382 219 : CPLFree(padfZ2);
8383 219 : CPLFree(pabSuccess);
8384 219 : CPLFree(pabSuccess2);
8385 219 : VSIFree(pafCounts);
8386 : }
8387 :
8388 : /************************************************************************/
8389 : /* getOrientation() */
8390 : /************************************************************************/
8391 :
8392 : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
8393 : // -1 if it is counter-clockwise oriented,
8394 : // or 0 if it is colinear.
8395 2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
8396 : {
8397 2355910 : const double p1x = p1.first;
8398 2355910 : const double p1y = p1.second;
8399 2355910 : const double p2x = p2.first;
8400 2355910 : const double p2y = p2.second;
8401 2355910 : const double p3x = p3.first;
8402 2355910 : const double p3y = p3.second;
8403 2355910 : const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
8404 2355910 : if (std::abs(val) < 1e-20)
8405 2690 : return 0;
8406 2353220 : else if (val > 0)
8407 0 : return 1;
8408 : else
8409 2353220 : return -1;
8410 : }
8411 :
8412 : /************************************************************************/
8413 : /* isConvex() */
8414 : /************************************************************************/
8415 :
8416 : // poly must be closed
8417 785302 : static bool isConvex(const XYPoly &poly)
8418 : {
8419 785302 : const size_t n = poly.size();
8420 785302 : size_t i = 0;
8421 785302 : int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8422 785302 : ++i;
8423 2355910 : for (; i < n - 2; ++i)
8424 : {
8425 : const int orientation =
8426 1570600 : getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8427 1570600 : if (orientation != 0)
8428 : {
8429 1567910 : if (last_orientation == 0)
8430 0 : last_orientation = orientation;
8431 1567910 : else if (orientation != last_orientation)
8432 0 : return false;
8433 : }
8434 : }
8435 785302 : return true;
8436 : }
8437 :
8438 : /************************************************************************/
8439 : /* pointIntersectsConvexPoly() */
8440 : /************************************************************************/
8441 :
8442 : // Returns whether xy intersects poly, that must be closed and convex.
8443 6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
8444 : {
8445 6049100 : const size_t n = poly.size();
8446 6049100 : double dx1 = xy.first - poly[0].first;
8447 6049100 : double dy1 = xy.second - poly[0].second;
8448 6049100 : double dx2 = poly[1].first - poly[0].first;
8449 6049100 : double dy2 = poly[1].second - poly[0].second;
8450 6049100 : double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
8451 :
8452 : // Check if the point remains on the same side (left/right) of all edges
8453 14556400 : for (size_t i = 2; i < n; i++)
8454 : {
8455 12793100 : dx1 = xy.first - poly[i - 1].first;
8456 12793100 : dy1 = xy.second - poly[i - 1].second;
8457 :
8458 12793100 : dx2 = poly[i].first - poly[i - 1].first;
8459 12793100 : dy2 = poly[i].second - poly[i - 1].second;
8460 :
8461 12793100 : double crossProduct = dx1 * dy2 - dx2 * dy1;
8462 12793100 : if (std::abs(prevCrossProduct) < 1e-20)
8463 725558 : prevCrossProduct = crossProduct;
8464 12067500 : else if (prevCrossProduct * crossProduct < 0)
8465 4285760 : return false;
8466 : }
8467 :
8468 1763340 : return true;
8469 : }
8470 :
8471 : /************************************************************************/
8472 : /* getIntersection() */
8473 : /************************************************************************/
8474 :
8475 : /* Returns intersection of [p1,p2] with [p3,p4], if
8476 : * it is a single point, and the 2 segments are not colinear.
8477 : */
8478 11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
8479 : const XYPair &p3, const XYPair &p4, XYPair &xy)
8480 : {
8481 11811000 : const double x1 = p1.first;
8482 11811000 : const double y1 = p1.second;
8483 11811000 : const double x2 = p2.first;
8484 11811000 : const double y2 = p2.second;
8485 11811000 : const double x3 = p3.first;
8486 11811000 : const double y3 = p3.second;
8487 11811000 : const double x4 = p4.first;
8488 11811000 : const double y4 = p4.second;
8489 11811000 : const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
8490 11811000 : const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
8491 11811000 : if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
8492 9260780 : return false;
8493 :
8494 2550260 : const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
8495 2550260 : if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
8496 973924 : return false;
8497 :
8498 1576340 : const double t = t_num / denom;
8499 1576340 : xy.first = x1 + t * (x2 - x1);
8500 1576340 : xy.second = y1 + t * (y2 - y1);
8501 1576340 : return true;
8502 : }
8503 :
8504 : /************************************************************************/
8505 : /* getConvexPolyIntersection() */
8506 : /************************************************************************/
8507 :
8508 : // poly1 and poly2 must be closed and convex.
8509 : // The returned intersection will not necessary be closed.
8510 785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
8511 : XYPoly &intersection)
8512 : {
8513 785302 : intersection.clear();
8514 :
8515 : // Add all points of poly1 inside poly2
8516 3926510 : for (size_t i = 0; i < poly1.size() - 1; ++i)
8517 : {
8518 3141210 : if (pointIntersectsConvexPoly(poly1[i], poly2))
8519 1187430 : intersection.push_back(poly1[i]);
8520 : }
8521 785302 : if (intersection.size() == poly1.size() - 1)
8522 : {
8523 : // poly1 is inside poly2
8524 119100 : return;
8525 : }
8526 :
8527 : // Add all points of poly2 inside poly1
8528 3634860 : for (size_t i = 0; i < poly2.size() - 1; ++i)
8529 : {
8530 2907890 : if (pointIntersectsConvexPoly(poly2[i], poly1))
8531 575904 : intersection.push_back(poly2[i]);
8532 : }
8533 :
8534 : // Compute the intersection of all edges of both polygons
8535 726972 : XYPair xy;
8536 3634860 : for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
8537 : {
8538 14539400 : for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
8539 : {
8540 11631600 : if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
8541 11631600 : poly2[i2 + 1], xy))
8542 : {
8543 1576230 : intersection.push_back(xy);
8544 : }
8545 : }
8546 : }
8547 :
8548 726972 : if (intersection.empty())
8549 60770 : return;
8550 :
8551 : // Find lowest-left point in intersection set
8552 666202 : double lowest_x = cpl::NumericLimits<double>::max();
8553 666202 : double lowest_y = cpl::NumericLimits<double>::max();
8554 3772450 : for (const auto &pair : intersection)
8555 : {
8556 3106240 : const double x = pair.first;
8557 3106240 : const double y = pair.second;
8558 3106240 : if (y < lowest_y || (y == lowest_y && x < lowest_x))
8559 : {
8560 1096040 : lowest_x = x;
8561 1096040 : lowest_y = y;
8562 : }
8563 : }
8564 :
8565 5737980 : const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
8566 : {
8567 5737980 : const double p1x_diff = p1.first - lowest_x;
8568 5737980 : const double p1y_diff = p1.second - lowest_y;
8569 5737980 : const double p2x_diff = p2.first - lowest_x;
8570 5737980 : const double p2y_diff = p2.second - lowest_y;
8571 5737980 : if (p2y_diff == 0.0 && p1y_diff == 0.0)
8572 : {
8573 2655420 : if (p1x_diff >= 0)
8574 : {
8575 2655420 : if (p2x_diff >= 0)
8576 2655420 : return p1.first < p2.first;
8577 0 : return true;
8578 : }
8579 : else
8580 : {
8581 0 : if (p2x_diff >= 0)
8582 0 : return false;
8583 0 : return p1.first < p2.first;
8584 : }
8585 : }
8586 :
8587 3082560 : if (p2x_diff == 0.0 && p1x_diff == 0.0)
8588 1046960 : return p1.second < p2.second;
8589 :
8590 : double tan_p1;
8591 2035600 : if (p1x_diff == 0.0)
8592 464622 : tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8593 : else
8594 1570980 : tan_p1 = p1y_diff / p1x_diff;
8595 :
8596 : double tan_p2;
8597 2035600 : if (p2x_diff == 0.0)
8598 839515 : tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8599 : else
8600 1196080 : tan_p2 = p2y_diff / p2x_diff;
8601 :
8602 2035600 : if (tan_p1 >= 0)
8603 : {
8604 1904790 : if (tan_p2 >= 0)
8605 1881590 : return tan_p1 < tan_p2;
8606 : else
8607 23199 : return true;
8608 : }
8609 : else
8610 : {
8611 130806 : if (tan_p2 >= 0)
8612 103900 : return false;
8613 : else
8614 26906 : return tan_p1 < tan_p2;
8615 : }
8616 666202 : };
8617 :
8618 : // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
8619 : // hull
8620 666202 : std::sort(intersection.begin(), intersection.end(), sortFunc);
8621 :
8622 : // Remove duplicated points
8623 666202 : size_t j = 1;
8624 3106240 : for (size_t i = 1; i < intersection.size(); ++i)
8625 : {
8626 2440040 : if (intersection[i] != intersection[i - 1])
8627 : {
8628 1452560 : if (j < i)
8629 545275 : intersection[j] = intersection[i];
8630 1452560 : ++j;
8631 : }
8632 : }
8633 666202 : intersection.resize(j);
8634 : }
8635 :
8636 : /************************************************************************/
8637 : /* GWKSumPreserving() */
8638 : /************************************************************************/
8639 :
8640 : static void GWKSumPreservingThread(void *pData);
8641 :
8642 19 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
8643 : {
8644 19 : return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
8645 : }
8646 :
8647 19 : static void GWKSumPreservingThread(void *pData)
8648 : {
8649 19 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
8650 19 : GDALWarpKernel *poWK = psJob->poWK;
8651 19 : const int iYMin = psJob->iYMin;
8652 19 : const int iYMax = psJob->iYMax;
8653 : const bool bIsAffineNoRotation =
8654 19 : GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
8655 28 : poWK->pTransformerArg) &&
8656 : // for debug/testing purposes
8657 9 : CPLTestBool(
8658 19 : CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
8659 : const bool bAvoidNoDataSingleBand =
8660 21 : poWK->nBands == 1 ||
8661 2 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
8662 19 : "UNIFIED_SRC_NODATA", "FALSE"));
8663 :
8664 19 : const int nDstXSize = poWK->nDstXSize;
8665 19 : const int nSrcXSize = poWK->nSrcXSize;
8666 19 : const int nSrcYSize = poWK->nSrcYSize;
8667 :
8668 38 : std::vector<double> adfX0(nSrcXSize + 1);
8669 38 : std::vector<double> adfY0(nSrcXSize + 1);
8670 38 : std::vector<double> adfZ0(nSrcXSize + 1);
8671 38 : std::vector<double> adfX1(nSrcXSize + 1);
8672 38 : std::vector<double> adfY1(nSrcXSize + 1);
8673 38 : std::vector<double> adfZ1(nSrcXSize + 1);
8674 38 : std::vector<int> abSuccess0(nSrcXSize + 1);
8675 38 : std::vector<int> abSuccess1(nSrcXSize + 1);
8676 :
8677 : CPLRectObj sGlobalBounds;
8678 19 : sGlobalBounds.minx = -2 * poWK->dfXScale;
8679 19 : sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
8680 19 : sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
8681 19 : sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
8682 19 : CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
8683 :
8684 : struct SourcePixel
8685 : {
8686 : int iSrcX;
8687 : int iSrcY;
8688 :
8689 : // Coordinates of source pixel in target pixel coordinates
8690 : double dfDstX0;
8691 : double dfDstY0;
8692 : double dfDstX1;
8693 : double dfDstY1;
8694 : double dfDstX2;
8695 : double dfDstY2;
8696 : double dfDstX3;
8697 : double dfDstY3;
8698 :
8699 : // Source pixel total area (might be larger than the one described
8700 : // by above coordinates, if the pixel was crossing the antimeridian
8701 : // and split)
8702 : double dfArea;
8703 : };
8704 :
8705 38 : std::vector<SourcePixel> sourcePixels;
8706 :
8707 38 : XYPoly discontinuityLeft(5);
8708 38 : XYPoly discontinuityRight(5);
8709 :
8710 : /* ==================================================================== */
8711 : /* First pass: transform the 4 corners of each potential */
8712 : /* contributing source pixel to target pixel coordinates. */
8713 : /* ==================================================================== */
8714 :
8715 : // Special case for top line
8716 : {
8717 19 : int iY = 0;
8718 3364 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8719 : {
8720 3345 : adfX1[iX] = iX + poWK->nSrcXOff;
8721 3345 : adfY1[iX] = iY + poWK->nSrcYOff;
8722 3345 : adfZ1[iX] = 0;
8723 : }
8724 :
8725 19 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8726 : adfX1.data(), adfY1.data(), adfZ1.data(),
8727 : abSuccess1.data());
8728 :
8729 3364 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8730 : {
8731 3345 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8732 0 : abSuccess1[iX] = FALSE;
8733 : else
8734 : {
8735 3345 : adfX1[iX] -= poWK->nDstXOff;
8736 3345 : adfY1[iX] -= poWK->nDstYOff;
8737 : }
8738 : }
8739 : }
8740 :
8741 2032 : const auto getInsideXSign = [poWK, nDstXSize](double dfX)
8742 : {
8743 2032 : return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
8744 872 : dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
8745 2032 : ? 1
8746 1160 : : -1;
8747 19 : };
8748 :
8749 : const auto FindDiscontinuity =
8750 80 : [poWK, psJob, getInsideXSign](
8751 : double dfXLeft, double dfXRight, double dfY,
8752 : int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
8753 800 : double &dfXMidReprojectedRight, double &dfYMidReprojected)
8754 : {
8755 880 : for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
8756 : {
8757 800 : double dfXMid = (dfXLeft + dfXRight) / 2;
8758 800 : double dfXMidReprojected = dfXMid;
8759 800 : dfYMidReprojected = dfY;
8760 800 : double dfZ = 0;
8761 800 : int nSuccess = 0;
8762 800 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
8763 : &dfXMidReprojected, &dfYMidReprojected, &dfZ,
8764 : &nSuccess);
8765 800 : if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
8766 : {
8767 456 : dfXRight = dfXMid;
8768 456 : dfXMidReprojectedRight = dfXMidReprojected;
8769 : }
8770 : else
8771 : {
8772 344 : dfXLeft = dfXMid;
8773 344 : dfXMidReprojectedLeft = dfXMidReprojected;
8774 : }
8775 : }
8776 80 : };
8777 :
8778 2685 : for (int iY = 0; iY < nSrcYSize; ++iY)
8779 : {
8780 2666 : std::swap(adfX0, adfX1);
8781 2666 : std::swap(adfY0, adfY1);
8782 2666 : std::swap(adfZ0, adfZ1);
8783 2666 : std::swap(abSuccess0, abSuccess1);
8784 :
8785 4836120 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8786 : {
8787 4833460 : adfX1[iX] = iX + poWK->nSrcXOff;
8788 4833460 : adfY1[iX] = iY + 1 + poWK->nSrcYOff;
8789 4833460 : adfZ1[iX] = 0;
8790 : }
8791 :
8792 2666 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8793 : adfX1.data(), adfY1.data(), adfZ1.data(),
8794 : abSuccess1.data());
8795 :
8796 4836120 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8797 : {
8798 4833460 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8799 0 : abSuccess1[iX] = FALSE;
8800 : else
8801 : {
8802 4833460 : adfX1[iX] -= poWK->nDstXOff;
8803 4833460 : adfY1[iX] -= poWK->nDstYOff;
8804 : }
8805 : }
8806 :
8807 4833460 : for (int iX = 0; iX < nSrcXSize; ++iX)
8808 : {
8809 9661580 : if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
8810 4830790 : abSuccess1[iX + 1])
8811 : {
8812 : /* --------------------------------------------------------------------
8813 : */
8814 : /* Do not try to apply transparent source pixels to the
8815 : * destination.*/
8816 : /* --------------------------------------------------------------------
8817 : */
8818 4830790 : const auto iSrcOffset =
8819 4830790 : iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
8820 9560570 : if (poWK->panUnifiedSrcValid != nullptr &&
8821 4729780 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
8822 : {
8823 4738340 : continue;
8824 : }
8825 :
8826 103415 : if (poWK->pafUnifiedSrcDensity != nullptr)
8827 : {
8828 0 : if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
8829 : SRC_DENSITY_THRESHOLD_FLOAT)
8830 0 : continue;
8831 : }
8832 :
8833 : SourcePixel sp;
8834 103415 : sp.dfArea = 0;
8835 103415 : sp.dfDstX0 = adfX0[iX];
8836 103415 : sp.dfDstY0 = adfY0[iX];
8837 103415 : sp.dfDstX1 = adfX0[iX + 1];
8838 103415 : sp.dfDstY1 = adfY0[iX + 1];
8839 103415 : sp.dfDstX2 = adfX1[iX + 1];
8840 103415 : sp.dfDstY2 = adfY1[iX + 1];
8841 103415 : sp.dfDstX3 = adfX1[iX];
8842 103415 : sp.dfDstY3 = adfY1[iX];
8843 :
8844 : // Detect pixel that likely cross the anti-meridian and
8845 : // introduce a discontinuity when reprojected.
8846 :
8847 103415 : if (std::fabs(adfX0[iX] - adfX0[iX + 1]) > 2 * poWK->dfXScale &&
8848 80 : std::fabs(adfX1[iX] - adfX1[iX + 1]) > 2 * poWK->dfXScale &&
8849 40 : getInsideXSign(adfX0[iX]) !=
8850 80 : getInsideXSign(adfX0[iX + 1]) &&
8851 80 : getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
8852 40 : getInsideXSign(adfX0[iX + 1]) ==
8853 103495 : getInsideXSign(adfX1[iX + 1]) &&
8854 40 : (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
8855 : 0)
8856 : {
8857 : #ifdef DEBUG_VERBOSE
8858 : CPLDebug(
8859 : "WARP",
8860 : "Discontinuity for iSrcX=%d, iSrcY=%d, dest corners:"
8861 : "X0[iX]=%f X0[iX+1]=%f X1[iX]=%f X1[iX+1]=%f,"
8862 : "Y0[iX]=%f Y0[iX+1]=%f Y1[iX]=%f Y1[iX+1]=%f",
8863 : iX + poWK->nSrcXOff, iY + poWK->nSrcYOff, adfX0[iX],
8864 : adfX0[iX + 1], adfX1[iX], adfX1[iX + 1], adfY0[iX],
8865 : adfY0[iX + 1], adfY1[iX], adfY1[iX + 1]);
8866 : #endif
8867 40 : double dfXMidReprojectedLeftTop = 0;
8868 40 : double dfXMidReprojectedRightTop = 0;
8869 40 : double dfYMidReprojectedTop = 0;
8870 40 : FindDiscontinuity(
8871 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8872 80 : iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
8873 : dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
8874 : dfYMidReprojectedTop);
8875 40 : double dfXMidReprojectedLeftBottom = 0;
8876 40 : double dfXMidReprojectedRightBottom = 0;
8877 40 : double dfYMidReprojectedBottom = 0;
8878 40 : FindDiscontinuity(
8879 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8880 80 : iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
8881 : dfXMidReprojectedLeftBottom,
8882 : dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
8883 :
8884 40 : discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
8885 40 : discontinuityLeft[1] =
8886 80 : XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
8887 40 : discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
8888 40 : dfYMidReprojectedBottom);
8889 40 : discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
8890 40 : discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
8891 :
8892 40 : discontinuityRight[0] =
8893 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8894 40 : discontinuityRight[1] =
8895 80 : XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
8896 40 : discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
8897 40 : dfYMidReprojectedBottom);
8898 40 : discontinuityRight[3] =
8899 80 : XYPair(adfX1[iX + 1], adfY1[iX + 1]);
8900 40 : discontinuityRight[4] =
8901 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8902 :
8903 40 : sp.dfArea = getArea(discontinuityLeft) +
8904 40 : getArea(discontinuityRight);
8905 40 : if (getInsideXSign(adfX0[iX]) >= 1)
8906 : {
8907 20 : sp.dfDstX1 = dfXMidReprojectedLeftTop;
8908 20 : sp.dfDstY1 = dfYMidReprojectedTop;
8909 20 : sp.dfDstX2 = dfXMidReprojectedLeftBottom;
8910 20 : sp.dfDstY2 = dfYMidReprojectedBottom;
8911 : }
8912 : else
8913 : {
8914 20 : sp.dfDstX0 = dfXMidReprojectedRightTop;
8915 20 : sp.dfDstY0 = dfYMidReprojectedTop;
8916 20 : sp.dfDstX3 = dfXMidReprojectedRightBottom;
8917 20 : sp.dfDstY3 = dfYMidReprojectedBottom;
8918 : }
8919 : }
8920 :
8921 : // Bounding box of source pixel (expressed in target pixel
8922 : // coordinates)
8923 : CPLRectObj sRect;
8924 103415 : sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
8925 103415 : std::min(sp.dfDstX2, sp.dfDstX3));
8926 103415 : sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
8927 103415 : std::min(sp.dfDstY2, sp.dfDstY3));
8928 103415 : sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
8929 103415 : std::max(sp.dfDstX2, sp.dfDstX3));
8930 103415 : sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
8931 103415 : std::max(sp.dfDstY2, sp.dfDstY3));
8932 103415 : if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
8933 101355 : sRect.miny < iYMax && sRect.maxy > iYMin))
8934 : {
8935 10852 : continue;
8936 : }
8937 :
8938 92563 : sp.iSrcX = iX;
8939 92563 : sp.iSrcY = iY;
8940 :
8941 92563 : if (!bIsAffineNoRotation)
8942 : {
8943 : // Check polygon validity (no self-crossing)
8944 89745 : XYPair xy;
8945 89745 : if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
8946 89745 : XYPair(sp.dfDstX1, sp.dfDstY1),
8947 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8948 269235 : XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
8949 89745 : getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
8950 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8951 89745 : XYPair(sp.dfDstX0, sp.dfDstY0),
8952 179490 : XYPair(sp.dfDstX3, sp.dfDstY3), xy))
8953 : {
8954 113 : continue;
8955 : }
8956 : }
8957 :
8958 92450 : CPLQuadTreeInsertWithBounds(
8959 : hQuadTree,
8960 : reinterpret_cast<void *>(
8961 92450 : static_cast<uintptr_t>(sourcePixels.size())),
8962 : &sRect);
8963 :
8964 92450 : sourcePixels.push_back(sp);
8965 : }
8966 : }
8967 : }
8968 :
8969 38 : std::vector<double> adfRealValue(poWK->nBands);
8970 38 : std::vector<double> adfImagValue(poWK->nBands);
8971 38 : std::vector<double> adfBandDensity(poWK->nBands);
8972 38 : std::vector<double> adfWeight(poWK->nBands);
8973 :
8974 : #ifdef CHECK_SUM_WITH_GEOS
8975 : auto hGEOSContext = OGRGeometry::createGEOSContext();
8976 : auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8977 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
8978 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
8979 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
8980 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
8981 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
8982 : auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
8983 : auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
8984 :
8985 : auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8986 : auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
8987 : auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
8988 : #endif
8989 :
8990 : const XYPoly xy1{
8991 38 : {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
8992 38 : XYPoly xy2(5);
8993 38 : XYPoly xy2_triangle(4);
8994 38 : XYPoly intersection;
8995 :
8996 : /* ==================================================================== */
8997 : /* Loop over output lines. */
8998 : /* ==================================================================== */
8999 1951 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
9000 : {
9001 : CPLRectObj sRect;
9002 1932 : sRect.miny = iDstY;
9003 1932 : sRect.maxy = iDstY + 1;
9004 :
9005 : /* ====================================================================
9006 : */
9007 : /* Loop over pixels in output scanline. */
9008 : /* ====================================================================
9009 : */
9010 1403940 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
9011 : {
9012 1402010 : sRect.minx = iDstX;
9013 1402010 : sRect.maxx = iDstX + 1;
9014 1402010 : int nSourcePixels = 0;
9015 : void **pahSourcePixel =
9016 1402010 : CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
9017 1402010 : if (nSourcePixels == 0)
9018 : {
9019 1183090 : CPLFree(pahSourcePixel);
9020 1183100 : continue;
9021 : }
9022 :
9023 218919 : std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
9024 218919 : std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
9025 218919 : std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
9026 218919 : std::fill(adfWeight.begin(), adfWeight.end(), 0);
9027 218919 : double dfDensity = 0;
9028 : // Just above zero to please Coveriy Scan
9029 218919 : double dfTotalWeight = std::numeric_limits<double>::min();
9030 :
9031 : /* ====================================================================
9032 : */
9033 : /* Iterate over each contributing source pixel to add its
9034 : */
9035 : /* value weighed by the ratio of the area of its
9036 : * intersection */
9037 : /* with the target pixel divided by the area of the source
9038 : */
9039 : /* pixel. */
9040 : /* ====================================================================
9041 : */
9042 1020550 : for (int i = 0; i < nSourcePixels; ++i)
9043 : {
9044 801628 : const int iSourcePixel = static_cast<int>(
9045 801628 : reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
9046 801628 : auto &sp = sourcePixels[iSourcePixel];
9047 :
9048 801628 : double dfWeight = 0.0;
9049 801628 : if (bIsAffineNoRotation)
9050 : {
9051 : // Optimization since the source pixel is a rectangle in
9052 : // target pixel coordinates
9053 16326 : double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
9054 16326 : double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
9055 16326 : double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
9056 16326 : double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
9057 16326 : double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
9058 16326 : double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
9059 16326 : double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
9060 16326 : double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
9061 16326 : dfWeight =
9062 16326 : ((dfIntersMaxX - dfIntersMinX) *
9063 16326 : (dfIntersMaxY - dfIntersMinY)) /
9064 16326 : ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
9065 : }
9066 : else
9067 : {
9068 : // Compute the polygon of the source pixel in target pixel
9069 : // coordinates, and shifted to the target pixel (unit square
9070 : // coordinates)
9071 :
9072 785302 : xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
9073 785302 : xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
9074 785302 : xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
9075 785302 : xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
9076 785302 : xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
9077 :
9078 785302 : if (isConvex(xy2))
9079 : {
9080 785302 : getConvexPolyIntersection(xy1, xy2, intersection);
9081 785302 : if (intersection.size() >= 3)
9082 : {
9083 468849 : dfWeight = getArea(intersection);
9084 : }
9085 : }
9086 : else
9087 : {
9088 : // Split xy2 into 2 triangles.
9089 0 : xy2_triangle[0] = xy2[0];
9090 0 : xy2_triangle[1] = xy2[1];
9091 0 : xy2_triangle[2] = xy2[2];
9092 0 : xy2_triangle[3] = xy2[0];
9093 0 : getConvexPolyIntersection(xy1, xy2_triangle,
9094 : intersection);
9095 0 : if (intersection.size() >= 3)
9096 : {
9097 0 : dfWeight = getArea(intersection);
9098 : }
9099 :
9100 0 : xy2_triangle[1] = xy2[2];
9101 0 : xy2_triangle[2] = xy2[3];
9102 0 : getConvexPolyIntersection(xy1, xy2_triangle,
9103 : intersection);
9104 0 : if (intersection.size() >= 3)
9105 : {
9106 0 : dfWeight += getArea(intersection);
9107 : }
9108 : }
9109 785302 : if (dfWeight > 0.0)
9110 : {
9111 468828 : if (sp.dfArea == 0)
9112 89592 : sp.dfArea = getArea(xy2);
9113 468828 : dfWeight /= sp.dfArea;
9114 : }
9115 :
9116 : #ifdef CHECK_SUM_WITH_GEOS
9117 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
9118 : sp.dfDstX0 - iDstX,
9119 : sp.dfDstY0 - iDstY);
9120 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
9121 : sp.dfDstX1 - iDstX,
9122 : sp.dfDstY1 - iDstY);
9123 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
9124 : sp.dfDstX2 - iDstX,
9125 : sp.dfDstY2 - iDstY);
9126 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
9127 : sp.dfDstX3 - iDstX,
9128 : sp.dfDstY3 - iDstY);
9129 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
9130 : sp.dfDstX0 - iDstX,
9131 : sp.dfDstY0 - iDstY);
9132 :
9133 : double dfWeightGEOS = 0.0;
9134 : auto hIntersection =
9135 : GEOSIntersection_r(hGEOSContext, hP1, hP2);
9136 : if (hIntersection)
9137 : {
9138 : double dfIntersArea = 0.0;
9139 : if (GEOSArea_r(hGEOSContext, hIntersection,
9140 : &dfIntersArea) &&
9141 : dfIntersArea > 0)
9142 : {
9143 : double dfSourceArea = 0.0;
9144 : if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
9145 : {
9146 : dfWeightGEOS = dfIntersArea / dfSourceArea;
9147 : }
9148 : }
9149 : GEOSGeom_destroy_r(hGEOSContext, hIntersection);
9150 : }
9151 : if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
9152 : {
9153 : /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
9154 : dfWeight, dfWeightGEOS);
9155 : printf("xy2: "); // ok
9156 : for (const auto &xy : xy2)
9157 : printf("[%f, %f], ", xy.first, xy.second); // ok
9158 : printf("\n"); // ok
9159 : printf("intersection: "); // ok
9160 : for (const auto &xy : intersection)
9161 : printf("[%f, %f], ", xy.first, xy.second); // ok
9162 : printf("\n"); // ok
9163 : }
9164 : #endif
9165 : }
9166 801628 : if (dfWeight > 0.0)
9167 : {
9168 : #ifdef DEBUG_VERBOSE
9169 : #if defined(DST_X) && defined(DST_Y)
9170 : if (iDstX + poWK->nDstXOff == DST_X &&
9171 : iDstY + poWK->nDstYOff == DST_Y)
9172 : {
9173 : CPLDebug("WARP",
9174 : "iSrcX = %d, iSrcY = %d, weight =%.17g",
9175 : sp.iSrcX + poWK->nSrcXOff,
9176 : sp.iSrcY + poWK->nSrcYOff, dfWeight);
9177 : }
9178 : #endif
9179 : #endif
9180 :
9181 474104 : const GPtrDiff_t iSrcOffset =
9182 474104 : sp.iSrcX +
9183 474104 : static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
9184 474104 : dfTotalWeight += dfWeight;
9185 :
9186 474104 : if (poWK->pafUnifiedSrcDensity != nullptr)
9187 : {
9188 0 : dfDensity +=
9189 0 : dfWeight *
9190 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
9191 : }
9192 : else
9193 : {
9194 474104 : dfDensity += dfWeight;
9195 : }
9196 :
9197 1818730 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
9198 : {
9199 : // Returns pixel value if it is not no data.
9200 : double dfBandDensity;
9201 : double dfRealValue;
9202 : double dfImagValue;
9203 2689250 : if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
9204 : &dfBandDensity, &dfRealValue,
9205 : &dfImagValue) &&
9206 1344620 : dfBandDensity > BAND_DENSITY_THRESHOLD))
9207 : {
9208 0 : continue;
9209 : }
9210 : #ifdef DEBUG_VERBOSE
9211 : #if defined(DST_X) && defined(DST_Y)
9212 : if (iDstX + poWK->nDstXOff == DST_X &&
9213 : iDstY + poWK->nDstYOff == DST_Y)
9214 : {
9215 : CPLDebug("WARP", "value * weight = %.17g",
9216 : dfRealValue * dfWeight);
9217 : }
9218 : #endif
9219 : #endif
9220 :
9221 1344620 : adfRealValue[iBand] += dfRealValue * dfWeight;
9222 1344620 : adfImagValue[iBand] += dfImagValue * dfWeight;
9223 1344620 : adfBandDensity[iBand] += dfBandDensity * dfWeight;
9224 1344620 : adfWeight[iBand] += dfWeight;
9225 : }
9226 : }
9227 : }
9228 :
9229 218919 : CPLFree(pahSourcePixel);
9230 :
9231 : /* --------------------------------------------------------------------
9232 : */
9233 : /* Update destination pixel value. */
9234 : /* --------------------------------------------------------------------
9235 : */
9236 218919 : bool bHasFoundDensity = false;
9237 218919 : const GPtrDiff_t iDstOffset =
9238 218919 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
9239 827838 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
9240 : {
9241 608919 : if (adfWeight[iBand] > 0)
9242 : {
9243 : const double dfBandDensity =
9244 608909 : adfBandDensity[iBand] / adfWeight[iBand];
9245 608909 : if (dfBandDensity > BAND_DENSITY_THRESHOLD)
9246 : {
9247 608909 : bHasFoundDensity = true;
9248 608909 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
9249 608909 : adfRealValue[iBand],
9250 608909 : adfImagValue[iBand],
9251 : bAvoidNoDataSingleBand);
9252 : }
9253 : }
9254 : }
9255 :
9256 218919 : if (!bHasFoundDensity)
9257 10 : continue;
9258 :
9259 218909 : if (!bAvoidNoDataSingleBand)
9260 : {
9261 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
9262 : }
9263 :
9264 : /* --------------------------------------------------------------------
9265 : */
9266 : /* Update destination density/validity masks. */
9267 : /* --------------------------------------------------------------------
9268 : */
9269 218909 : GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
9270 :
9271 218909 : if (poWK->panDstValid != nullptr)
9272 : {
9273 11752 : CPLMaskSet(poWK->panDstValid, iDstOffset);
9274 : }
9275 : }
9276 :
9277 : /* --------------------------------------------------------------------
9278 : */
9279 : /* Report progress to the user, and optionally cancel out. */
9280 : /* --------------------------------------------------------------------
9281 : */
9282 1932 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
9283 0 : break;
9284 : }
9285 :
9286 : #ifdef CHECK_SUM_WITH_GEOS
9287 : GEOSGeom_destroy_r(hGEOSContext, hP1);
9288 : GEOSGeom_destroy_r(hGEOSContext, hP2);
9289 : OGRGeometry::freeGEOSContext(hGEOSContext);
9290 : #endif
9291 19 : CPLQuadTreeDestroy(hQuadTree);
9292 19 : }
|